Skip to content

Commit

Permalink
sql: modulo by prime in shard calculation for hash-sharded indexes
Browse files Browse the repository at this point in the history
We've discovered a case where a combination of non-random data and a
power-of-two number of buckets causes an uneven distribution of rows
in a hash-sharded index. While this specific case is very contrived, it
illustrates a small weakness in the current hash-shard calculation:
modulo by a power-of-two number of buckets only uses the last few bits
of the hash value.

Radu suggested this would be a problem in cockroachdb#67865 and also suggested a
fix: add an intermediate modulo by a larger prime before modulo by num
buckets, so we'll try that.

Fixes: cockroachdb#91109

Epic: None

Release note (performance improvement): This change updates the shard
calculation of newly-created hash-sharded indexes so that uneven
distributions of rows are less likely.
  • Loading branch information
michae2 committed Nov 2, 2022
1 parent 206fc07 commit f4620bb
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 82 deletions.
19 changes: 17 additions & 2 deletions pkg/sql/catalog/schemaexpr/hash_sharded_compute_expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@ package schemaexpr

import "github.com/cockroachdb/cockroach/pkg/sql/sem/tree"

// HashShardPrime is the first prime number larger than MaxBucketAllowed. We
// modulo by HashShardPrime in the hash bucket calculation to include more bits
// of the hash value, in case the user has picked a power-of-two number of
// buckets.
const HashShardPrime uint32 = 2053

// MakeHashShardComputeExpr creates the serialized computed expression for a hash shard
// column based on the column names and the number of buckets. The expression will be
// of the form:
//
// mod(fnv32(crdb_internal.datums_to_bytes(...)),buckets)
// mod(mod(fnv32(crdb_internal.datums_to_bytes(...)),prime),buckets)
func MakeHashShardComputeExpr(colNames []string, buckets int) *string {
unresolvedFunc := func(funcName string) tree.ResolvableFunctionReference {
return tree.ResolvableFunctionReference{
Expand Down Expand Up @@ -44,6 +50,15 @@ func MakeHashShardComputeExpr(colNames []string, buckets int) *string {
},
}
}
modPrime := func(expr tree.Expr) tree.Expr {
return &tree.FuncExpr{
Func: unresolvedFunc("mod"),
Exprs: tree.Exprs{
expr,
tree.NewDInt(tree.DInt(HashShardPrime)),
},
}
}
modBuckets := func(expr tree.Expr) tree.Expr {
return &tree.FuncExpr{
Func: unresolvedFunc("mod"),
Expand All @@ -53,6 +68,6 @@ func MakeHashShardComputeExpr(colNames []string, buckets int) *string {
},
}
}
res := tree.Serialize(modBuckets(hashedColumnsExpr()))
res := tree.Serialize(modBuckets(modPrime(hashedColumnsExpr())))
return &res
}
3 changes: 3 additions & 0 deletions pkg/sql/catalog/tabledesc/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ type ColumnDefDescs struct {
// hash-sharded index or primary key.
const MaxBucketAllowed = 2048

// Ensure that schemaexpr.HashShardPrime > MaxBucketAllowed.
const _ uint = uint(schemaexpr.HashShardPrime) - 1 - uint(MaxBucketAllowed)

// ColExprKind is an enum type of possible expressions on a column
// (e.g. 'DEFAULT' expression or 'ON UPDATE' expression).
type ColExprKind string
Expand Down
20 changes: 10 additions & 10 deletions pkg/sql/logictest/testdata/logic_test/alter_primary_key
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ t CREATE TABLE public.t (
z INT8 NOT NULL,
w INT8 NULL,
v JSONB NULL,
crdb_internal_z_shard_4 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(z)), 4:::INT8)) VIRTUAL,
crdb_internal_z_shard_4 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(z)), 2053:::INT8), 4:::INT8)) VIRTUAL,
CONSTRAINT t_pkey PRIMARY KEY (y ASC),
UNIQUE INDEX i3 (z ASC) STORING (y),
UNIQUE INDEX t_x_key (x ASC),
Expand Down Expand Up @@ -423,8 +423,8 @@ t CREATE TABLE public.t (
x INT8 NOT NULL,
y INT8 NOT NULL,
z INT8 NULL,
crdb_internal_z_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(z)), 5:::INT8)) VIRTUAL,
crdb_internal_y_shard_10 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(y)), 10:::INT8)) VIRTUAL,
crdb_internal_z_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(z)), 2053:::INT8), 5:::INT8)) VIRTUAL,
crdb_internal_y_shard_10 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(y)), 2053:::INT8), 10:::INT8)) VIRTUAL,
CONSTRAINT t_pkey PRIMARY KEY (y ASC) USING HASH WITH (bucket_count=10),
UNIQUE INDEX t_x_key (x ASC),
INDEX i1 (z ASC) USING HASH WITH (bucket_count=5),
Expand Down Expand Up @@ -486,7 +486,7 @@ query TT
SHOW CREATE t
----
t CREATE TABLE public.t (
crdb_internal_x_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), 5:::INT8)) VIRTUAL,
crdb_internal_x_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(x)), 2053:::INT8), 5:::INT8)) VIRTUAL,
x INT8 NOT NULL,
y INT8 NOT NULL,
z INT8 NULL,
Expand Down Expand Up @@ -638,7 +638,7 @@ SHOW CREATE t
t CREATE TABLE public.t (
x INT8 NOT NULL,
rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(),
crdb_internal_x_shard_4 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), 4:::INT8)) VIRTUAL,
crdb_internal_x_shard_4 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(x)), 2053:::INT8), 4:::INT8)) VIRTUAL,
CONSTRAINT t_pkey PRIMARY KEY (x ASC) USING HASH WITH (bucket_count=4)
)

Expand Down Expand Up @@ -1058,9 +1058,9 @@ query TT
SHOW CREATE t
----
t CREATE TABLE public.t (
crdb_internal_x_shard_2 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), 2:::INT8)) VIRTUAL,
crdb_internal_x_shard_2 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(x)), 2053:::INT8), 2:::INT8)) VIRTUAL,
x INT8 NOT NULL,
crdb_internal_x_shard_3 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), 3:::INT8)) VIRTUAL,
crdb_internal_x_shard_3 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(x)), 2053:::INT8), 3:::INT8)) VIRTUAL,
CONSTRAINT t_pkey PRIMARY KEY (x ASC) USING HASH WITH (bucket_count=3)
)

Expand All @@ -1079,10 +1079,10 @@ query TT
SHOW CREATE t
----
t CREATE TABLE public.t (
crdb_internal_x_shard_2 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), 2:::INT8)) VIRTUAL,
crdb_internal_x_shard_2 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(x)), 2053:::INT8), 2:::INT8)) VIRTUAL,
x INT8 NOT NULL,
y INT8 NOT NULL,
crdb_internal_y_shard_2 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(y)), 2:::INT8)) VIRTUAL,
crdb_internal_y_shard_2 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(y)), 2053:::INT8), 2:::INT8)) VIRTUAL,
CONSTRAINT t_pkey PRIMARY KEY (y ASC) USING HASH WITH (bucket_count=2),
UNIQUE INDEX t_x_key (x ASC) USING HASH WITH (bucket_count=2),
FAMILY fam_0_x_y (x, y)
Expand Down Expand Up @@ -1550,7 +1550,7 @@ SELECT @2 FROM [SHOW CREATE TABLE t_test_param]
CREATE TABLE public.t_test_param (
a INT8 NOT NULL,
b INT8 NOT NULL,
crdb_internal_b_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(b)), 5:::INT8)) VIRTUAL,
crdb_internal_b_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(b)), 2053:::INT8), 5:::INT8)) VIRTUAL,
CONSTRAINT t_test_param_pkey PRIMARY KEY (b ASC) USING HASH WITH (bucket_count=5),
UNIQUE INDEX t_test_param_a_key (a ASC),
FAMILY fam_0_a_b (a, b)
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/create_index
Original file line number Diff line number Diff line change
Expand Up @@ -385,8 +385,8 @@ CREATE TABLE public.t_hash (
pk INT8 NOT NULL,
a INT8 NULL,
b INT8 NULL,
crdb_internal_a_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 5:::INT8)) VIRTUAL,
crdb_internal_b_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(b)), 5:::INT8)) VIRTUAL,
crdb_internal_a_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(a)), 2053:::INT8), 5:::INT8)) VIRTUAL,
crdb_internal_b_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(b)), 2053:::INT8), 5:::INT8)) VIRTUAL,
CONSTRAINT t_hash_pkey PRIMARY KEY (pk ASC),
INDEX idx_t_hash_a (a ASC) USING HASH WITH (bucket_count=5),
UNIQUE INDEX idx_t_hash_b (b ASC) USING HASH WITH (bucket_count=5),
Expand Down
10 changes: 5 additions & 5 deletions pkg/sql/logictest/testdata/logic_test/create_table
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ SHOW CREATE TABLE like_hash
----
like_hash CREATE TABLE public.like_hash (
a INT8 NULL,
crdb_internal_a_shard_4 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 4:::INT8)) VIRTUAL,
crdb_internal_a_shard_4 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(a)), 2053:::INT8), 4:::INT8)) VIRTUAL,
rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(),
CONSTRAINT like_hash_pkey PRIMARY KEY (rowid ASC),
INDEX like_hash_base_a_idx (a ASC) USING HASH WITH (bucket_count=4)
Expand All @@ -356,7 +356,7 @@ SHOW CREATE TABLE like_hash
----
like_hash CREATE TABLE public.like_hash (
a INT8 NULL,
crdb_internal_a_shard_4 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 4:::INT8)) VIRTUAL,
crdb_internal_a_shard_4 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(a)), 2053:::INT8), 4:::INT8)) VIRTUAL,
rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(),
CONSTRAINT like_hash_pkey PRIMARY KEY (rowid ASC),
INDEX like_hash_base_a_idx (a ASC) USING HASH WITH (bucket_count=4)
Expand Down Expand Up @@ -850,11 +850,11 @@ query T
SELECT @2 FROM [SHOW CREATE TABLE t_good_hash_indexes_1];
----
CREATE TABLE public.t_good_hash_indexes_1 (
crdb_internal_a_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 5:::INT8)) VIRTUAL,
crdb_internal_a_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(a)), 2053:::INT8), 5:::INT8)) VIRTUAL,
a INT8 NOT NULL,
b INT8 NULL,
c INT8 NULL,
crdb_internal_b_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(b)), 5:::INT8)) VIRTUAL,
crdb_internal_b_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(b)), 2053:::INT8), 5:::INT8)) VIRTUAL,
CONSTRAINT t_good_hash_indexes_1_pkey PRIMARY KEY (a ASC) USING HASH WITH (bucket_count=5),
INDEX t_good_hash_indexes_1_b_idx (b ASC) USING HASH WITH (bucket_count=5)
)
Expand All @@ -870,7 +870,7 @@ SELECT @2 FROM [SHOW CREATE TABLE t_good_hash_indexes_2];
----
CREATE TABLE public.t_good_hash_indexes_2 (
a INT8 NOT NULL,
crdb_internal_a_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 5:::INT8)) VIRTUAL,
crdb_internal_a_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(mod(fnv32(crdb_internal.datums_to_bytes(a)), 2053:::INT8), 5:::INT8)) VIRTUAL,
CONSTRAINT t_good_hash_indexes_2_pkey PRIMARY KEY (a ASC) USING HASH WITH (bucket_count=5)
)

Expand Down

0 comments on commit f4620bb

Please sign in to comment.