Skip to content

Commit

Permalink
br: fix the issue that config not paused with ttl correctly (#31725)
Browse files Browse the repository at this point in the history
close #31733
  • Loading branch information
3pointer committed May 10, 2022
1 parent c789478 commit e9e1e53
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 8 deletions.
17 changes: 13 additions & 4 deletions br/pkg/pdutil/pd.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const (
schedulerPrefix = "pd/api/v1/schedulers"
maxMsgSize = int(128 * units.MiB) // pd.ScanRegion may return a large response
scheduleConfigPrefix = "pd/api/v1/config/schedule"
configPrefix = "pd/api/v1/config"
pauseTimeout = 5 * time.Minute

// pd request retry time when connection fail
Expand Down Expand Up @@ -541,12 +542,20 @@ func (p *PdController) UpdatePDScheduleConfig(ctx context.Context) error {
func (p *PdController) doUpdatePDScheduleConfig(
ctx context.Context, cfg map[string]interface{}, post pdHTTPRequest, prefixs ...string,
) error {
prefix := scheduleConfigPrefix
prefix := configPrefix
if len(prefixs) != 0 {
prefix = prefixs[0]
}
newCfg := make(map[string]interface{})
for k, v := range cfg {
// if we want use ttl, we need use config prefix first.
// which means cfg should transfer from "max-merge-region-keys" to "schedule.max-merge-region-keys".
sc := fmt.Sprintf("schedule.%s", k)
newCfg[sc] = v
}

for _, addr := range p.addrs {
reqData, err := json.Marshal(cfg)
reqData, err := json.Marshal(newCfg)
if err != nil {
return errors.Trace(err)
}
Expand All @@ -562,7 +571,7 @@ func (p *PdController) doUpdatePDScheduleConfig(

func (p *PdController) doPauseConfigs(ctx context.Context, cfg map[string]interface{}, post pdHTTPRequest) error {
// pause this scheduler with 300 seconds
prefix := fmt.Sprintf("%s?ttlSecond=%.0f", scheduleConfigPrefix, pauseTimeout.Seconds())
prefix := fmt.Sprintf("%s?ttlSecond=%.0f", configPrefix, pauseTimeout.Seconds())
return p.doUpdatePDScheduleConfig(ctx, cfg, post, prefix)
}

Expand All @@ -584,7 +593,7 @@ func restoreSchedulers(ctx context.Context, pd *PdController, clusterCfg Cluster
prefix := make([]string, 0, 1)
if pd.isPauseConfigEnabled() {
// set config's ttl to zero, make temporary config invalid immediately.
prefix = append(prefix, fmt.Sprintf("%s?ttlSecond=%d", scheduleConfigPrefix, 0))
prefix = append(prefix, fmt.Sprintf("%s?ttlSecond=%d", configPrefix, 0))
}
// reset config with previous value.
if err := pd.doUpdatePDScheduleConfig(ctx, mergeCfg, pdRequest, prefix...); err != nil {
Expand Down
21 changes: 17 additions & 4 deletions br/tests/br_other/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,23 @@ run_curl "https://localhost:$PPROF_PORT/debug/pprof/trace?seconds=1" &>/dev/null
echo "pprof started..."

run_curl https://$PD_ADDR/pd/api/v1/config/schedule | grep '"disable": false'
run_curl https://$PD_ADDR/pd/api/v1/config/schedule | jq '."enable-location-replacement"' | grep "false"
run_curl https://$PD_ADDR/pd/api/v1/config/schedule | jq '."max-pending-peer-count"' | grep "2147483647"
run_curl https://$PD_ADDR/pd/api/v1/config/schedule | jq '."max-merge-region-size"' | grep -E "^0$"
run_curl https://$PD_ADDR/pd/api/v1/config/schedule | jq '."max-merge-region-keys"' | grep -E "^0$"

# after apply the pause api. these configs won't change any more.
# run_curl https://$PD_ADDR/pd/api/v1/config/schedule | jq '."enable-location-replacement"' | grep "false"
# run_curl https://$PD_ADDR/pd/api/v1/config/schedule | jq '."max-pending-peer-count"' | grep "2147483647"
# run_curl https://$PD_ADDR/pd/api/v1/config/schedule | jq '."max-merge-region-size"' | grep -E "^0$"
# run_curl https://$PD_ADDR/pd/api/v1/config/schedule | jq '."max-merge-region-keys"' | grep -E "^0$"

# after https://github.com/tikv/pd/pull/4781 merged.
# we can use a hack way to check whether we pause config succeed.
# By setting a paused config again and expect to fail with a clear message.
run_pd_ctl -u https://$PD_ADDR config set max-merge-region-size 0 | grep -q "need to clean up TTL first for schedule.max-merge-region-size"
run_pd_ctl -u https://$PD_ADDR config set max-merge-region-keys 0 | grep -q "need to clean up TTL first for schedule.max-merge-region-keys"
run_pd_ctl -u https://$PD_ADDR config set max-pending-peer-count 0 | grep -q "need to clean up TTL first for schedule.max-pending-peer-count"
run_pd_ctl -u https://$PD_ADDR config set enable-location-replacement false | grep -q "need to clean up TTL first for schedule.enable-location-replacement"
run_pd_ctl -u https://$PD_ADDR config set leader-schedule-limit 0 | grep -q "need to clean up TTL first for schedule.leader-schedule-limit"
run_pd_ctl -u https://$PD_ADDR config set region-schedule-limit 0 | grep -q "need to clean up TTL first for schedule.region-schedule-limit"
run_pd_ctl -u https://$PD_ADDR config set max-snapshot-count 0 | grep -q "need to clean up TTL first for schedule.max-snapshot-count"

backup_fail=0
# generate 1.sst to make another backup failed.
Expand Down

0 comments on commit e9e1e53

Please sign in to comment.