Skip to content

Commit

Permalink
schedule: use a controller to manage schedulers (tikv#6773)
Browse files Browse the repository at this point in the history
ref tikv#5839

Signed-off-by: Ryan Leung <rleungx@gmail.com>

Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com>
  • Loading branch information
rleungx and ti-chi-bot[bot] committed Nov 30, 2023
1 parent 3dc4928 commit 706a7b3
Show file tree
Hide file tree
Showing 9 changed files with 617 additions and 549 deletions.
330 changes: 24 additions & 306 deletions pkg/schedule/coordinator.go

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions pkg/schedule/diagnostic/diagnostic_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ import (
// Manager is used to manage the diagnostic result of schedulers for now.
type Manager struct {
config *config.PersistOptions
schedulerController map[string]*schedulers.ScheduleController
schedulerController *schedulers.Controller
}

// NewManager creates a new Manager.
func NewManager(schedulerController map[string]*schedulers.ScheduleController, config *config.PersistOptions) *Manager {
func NewManager(schedulerController *schedulers.Controller, config *config.PersistOptions) *Manager {
return &Manager{
config: config,
schedulerController: schedulerController,
Expand All @@ -42,8 +42,8 @@ func (d *Manager) GetDiagnosticResult(name string) (*schedulers.DiagnosticResult
return nil, errs.ErrDiagnosticDisabled
}

scheduler, isSchedulerExisted := d.schedulerController[name]
if !isSchedulerExisted {
scheduler := d.schedulerController.GetScheduler(name)
if scheduler == nil {
ts := uint64(time.Now().Unix())
res := &schedulers.DiagnosticResult{Name: name, Timestamp: ts, Status: schedulers.Disabled}
return res, nil
Expand Down Expand Up @@ -75,5 +75,5 @@ func (d *Manager) GetDiagnosticResult(name string) (*schedulers.DiagnosticResult
}

func (d *Manager) getSchedulerRecorder(name string) *schedulers.DiagnosticRecorder {
return d.schedulerController[name].GetDiagnosticRecorder()
return d.schedulerController.GetScheduler(name).GetDiagnosticRecorder()
}
9 changes: 0 additions & 9 deletions pkg/schedule/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,6 @@ var (
Help: "Status of the hotspot.",
}, []string{"address", "store", "type"})

schedulerStatusGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "status",
Help: "Status of the scheduler.",
}, []string{"kind", "type"})

regionListGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Expand All @@ -51,7 +43,6 @@ var (
)

func init() {
prometheus.MustRegister(schedulerStatusGauge)
prometheus.MustRegister(hotSpotStatusGauge)
prometheus.MustRegister(regionListGauge)
prometheus.MustRegister(patrolCheckRegionsGauge)
Expand Down
309 changes: 160 additions & 149 deletions pkg/schedule/schedulers/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,157 +16,168 @@ package schedulers

import "github.com/prometheus/client_golang/prometheus"

var schedulerCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "event_count",
Help: "Counter of scheduler events.",
}, []string{"type", "name"})

var schedulerStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "inner_status",
Help: "Inner status of the scheduler.",
}, []string{"type", "name"})

// TODO: pre-allocate gauge metrics
var opInfluenceStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "op_influence",
Help: "Store status for schedule",
}, []string{"scheduler", "store", "type"})

// TODO: pre-allocate gauge metrics
var tolerantResourceStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "tolerant_resource",
Help: "Store status for schedule",
}, []string{"scheduler"})

var balanceLeaderCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "balance_leader",
Help: "Counter of balance leader scheduler.",
}, []string{"type", "store"})

var balanceWitnessCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "balance_witness",
Help: "Counter of balance witness scheduler.",
}, []string{"type", "store"})

var balanceRegionCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "balance_region",
Help: "Counter of balance region scheduler.",
}, []string{"type", "store"})

// TODO: pre-allocate gauge metrics
var hotSchedulerResultCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "hot_region",
Help: "Counter of hot region scheduler.",
}, []string{"type", "store"})

var balanceDirectionCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "balance_direction",
Help: "Counter of direction of balance related schedulers.",
}, []string{"type", "source", "target"})

// TODO: pre-allocate gauge metrics
var hotDirectionCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "hot_region_direction",
Help: "Counter of hot region scheduler.",
}, []string{"type", "rw", "store", "direction", "dim"})

var scatterRangeLeaderCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "scatter_range_leader",
Help: "Counter of scatter range leader scheduler.",
}, []string{"type", "store"})

var scatterRangeRegionCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "scatter_range_region",
Help: "Counter of scatter range region scheduler.",
}, []string{"type", "store"})

var hotPendingStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "hot_pending",
Help: "Pending influence status in hot region scheduler.",
}, []string{"type", "source", "target"})

var hotPeerHist = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "hot_peer",
Help: "Bucketed histogram of the scheduling hot peer.",
Buckets: prometheus.ExponentialBuckets(1, 2, 30),
}, []string{"type", "rw", "dim"})

var storeSlowTrendEvictedStatusGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "store_slow_trend_evicted_status",
Help: "Store evited by slow trend status for schedule",
}, []string{"address", "store"})

var storeSlowTrendActionStatusGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "store_slow_trend_action_status",
Help: "Store trend scheduler calculating actions",
}, []string{"reason"})

var storeSlowTrendMiscGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "store_slow_trend_misc",
Help: "Store trend internal uncatelogued values",
}, []string{"type"})

// HotPendingSum is the sum of pending influence in hot region scheduler.
var HotPendingSum = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "hot_pending_sum",
Help: "Pending influence sum of store in hot region scheduler.",
}, []string{"store", "rw", "dim"})
var (
schedulerStatusGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "status",
Help: "Status of the scheduler.",
}, []string{"kind", "type"})

schedulerCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "event_count",
Help: "Counter of scheduler events.",
}, []string{"type", "name"})

schedulerStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "inner_status",
Help: "Inner status of the scheduler.",
}, []string{"type", "name"})

// TODO: pre-allocate gauge metrics
opInfluenceStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "op_influence",
Help: "Store status for schedule",
}, []string{"scheduler", "store", "type"})

// TODO: pre-allocate gauge metrics
tolerantResourceStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "tolerant_resource",
Help: "Store status for schedule",
}, []string{"scheduler"})

balanceLeaderCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "balance_leader",
Help: "Counter of balance leader scheduler.",
}, []string{"type", "store"})

balanceWitnessCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "balance_witness",
Help: "Counter of balance witness scheduler.",
}, []string{"type", "store"})

balanceRegionCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "balance_region",
Help: "Counter of balance region scheduler.",
}, []string{"type", "store"})

// TODO: pre-allocate gauge metrics
hotSchedulerResultCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "hot_region",
Help: "Counter of hot region scheduler.",
}, []string{"type", "store"})

balanceDirectionCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "balance_direction",
Help: "Counter of direction of balance related schedulers.",
}, []string{"type", "source", "target"})

// TODO: pre-allocate gauge metrics
hotDirectionCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "hot_region_direction",
Help: "Counter of hot region scheduler.",
}, []string{"type", "rw", "store", "direction", "dim"})

scatterRangeLeaderCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "scatter_range_leader",
Help: "Counter of scatter range leader scheduler.",
}, []string{"type", "store"})

scatterRangeRegionCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "scatter_range_region",
Help: "Counter of scatter range region scheduler.",
}, []string{"type", "store"})

hotPendingStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "hot_pending",
Help: "Pending influence status in hot region scheduler.",
}, []string{"type", "source", "target"})

hotPeerHist = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "hot_peer",
Help: "Bucketed histogram of the scheduling hot peer.",
Buckets: prometheus.ExponentialBuckets(1, 2, 30),
}, []string{"type", "rw", "dim"})

storeSlowTrendEvictedStatusGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "store_slow_trend_evicted_status",
Help: "Store evited by slow trend status for schedule",
}, []string{"address", "store"})

storeSlowTrendActionStatusGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "store_slow_trend_action_status",
Help: "Store trend scheduler calculating actions",
}, []string{"reason"})

storeSlowTrendMiscGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "store_slow_trend_misc",
Help: "Store trend internal uncatelogued values",
}, []string{"type"})

// HotPendingSum is the sum of pending influence in hot region scheduler.
HotPendingSum = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "hot_pending_sum",
Help: "Pending influence sum of store in hot region scheduler.",
}, []string{"store", "rw", "dim"})
)

func init() {
prometheus.MustRegister(schedulerStatusGauge)
prometheus.MustRegister(schedulerCounter)
prometheus.MustRegister(schedulerStatus)
prometheus.MustRegister(balanceLeaderCounter)
Expand Down

0 comments on commit 706a7b3

Please sign in to comment.