Skip to content

Commit

Permalink
ddl: make local sort generate only one subtask for each instance (#50925
Browse files Browse the repository at this point in the history
)

ref #48795
  • Loading branch information
tangenta committed Feb 5, 2024
1 parent 1621e0f commit 613d999
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 13 deletions.
1 change: 0 additions & 1 deletion pkg/ddl/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,6 @@ go_test(
"//pkg/util/sem",
"//pkg/util/sqlexec",
"//pkg/util/timeutil",
"@com_github_docker_go_units//:go-units",
"@com_github_ngaut_pools//:pools",
"@com_github_pingcap_errors//:errors",
"@com_github_pingcap_failpoint//:failpoint",
Expand Down
8 changes: 3 additions & 5 deletions pkg/ddl/backfilling_dist_scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ import (
"github.com/pingcap/tidb/pkg/kv"
"github.com/pingcap/tidb/pkg/meta"
"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/sessionctx/variable"
"github.com/pingcap/tidb/pkg/store/helper"
"github.com/pingcap/tidb/pkg/table"
"github.com/pingcap/tidb/pkg/util/logutil"
Expand Down Expand Up @@ -320,12 +319,11 @@ func generateNonPartitionPlan(

func calculateRegionBatch(totalRegionCnt int, instanceCnt int, useLocalDisk bool) int {
var regionBatch int
avgTasksPerInstance := totalRegionCnt / instanceCnt
avgTasksPerInstance := (totalRegionCnt + instanceCnt - 1) / instanceCnt // ceiling
if useLocalDisk {
// Make subtask large enough to reduce the overhead of local/global flush.
avgTasksPerDisk := int(int64(variable.DDLDiskQuota.Load()) / int64(config.SplitRegionSize))
regionBatch = min(avgTasksPerDisk, avgTasksPerInstance)
regionBatch = avgTasksPerInstance
} else {
// For cloud storage, each subtask should contain no more than 100 regions.
regionBatch = min(100, avgTasksPerInstance)
}
regionBatch = max(regionBatch, 1)
Expand Down
10 changes: 3 additions & 7 deletions pkg/ddl/backfilling_dist_scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"testing"
"time"

"github.com/docker/go-units"
"github.com/ngaut/pools"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/br/pkg/lightning/backend/external"
Expand All @@ -32,7 +31,6 @@ import (
"github.com/pingcap/tidb/pkg/meta"
"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx/variable"
"github.com/pingcap/tidb/pkg/testkit"
"github.com/stretchr/testify/require"
"github.com/tikv/client-go/v2/util"
Expand Down Expand Up @@ -115,21 +113,19 @@ func TestBackfillingSchedulerLocalMode(t *testing.T) {
func TestCalculateRegionBatch(t *testing.T) {
// Test calculate in cloud storage.
batchCnt := ddl.CalculateRegionBatchForTest(100, 8, false)
require.Equal(t, 12, batchCnt)
require.Equal(t, 13, batchCnt)
batchCnt = ddl.CalculateRegionBatchForTest(2, 8, false)
require.Equal(t, 1, batchCnt)
batchCnt = ddl.CalculateRegionBatchForTest(8, 8, false)
require.Equal(t, 1, batchCnt)

// Test calculate in local storage.
variable.DDLDiskQuota.Store(96 * units.MiB * 1000)
batchCnt = ddl.CalculateRegionBatchForTest(100, 8, true)
require.Equal(t, 12, batchCnt)
require.Equal(t, 13, batchCnt)
batchCnt = ddl.CalculateRegionBatchForTest(2, 8, true)
require.Equal(t, 1, batchCnt)
variable.DDLDiskQuota.Store(96 * units.MiB * 2)
batchCnt = ddl.CalculateRegionBatchForTest(24, 8, true)
require.Equal(t, 2, batchCnt)
require.Equal(t, 3, batchCnt)
}

func TestBackfillingSchedulerGlobalSortMode(t *testing.T) {
Expand Down

0 comments on commit 613d999

Please sign in to comment.