Skip to content

Commit

Permalink
sql: add session setting for scan costing methodology
Browse files Browse the repository at this point in the history
In this PR we add a session setting to gate usage of an upcoming feature
in which the optimizer will use `AvgSize`, the average column size, from
table stats to cost scans and index joins. When enabled, the optimizer
will revert to the old default method of costing scans, where each
column is treated as the same size. By default, this setting will be
off.

Informs: cockroachdb#72332

Release note: None
  • Loading branch information
rharding6373 committed Jan 26, 2022
1 parent 81c447d commit af32d9d
Show file tree
Hide file tree
Showing 12 changed files with 105 additions and 52 deletions.
1 change: 1 addition & 0 deletions docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ sql.cross_db_fks.enabled boolean false if true, creating foreign key references
sql.cross_db_sequence_owners.enabled boolean false if true, creating sequences owned by tables from other databases is allowed
sql.cross_db_sequence_references.enabled boolean false if true, sequences referenced by tables from other databases are allowed
sql.cross_db_views.enabled boolean false if true, creating views that refer to other databases is allowed
sql.defaults.cost_scans_with_default_col_size.enabled boolean false setting to true uses the same size for all columns to compute scan cost
sql.defaults.datestyle enumeration iso, mdy default value for DateStyle session setting [iso, mdy = 0, iso, dmy = 1, iso, ymd = 2]
sql.defaults.datestyle.enabled boolean false default value for datestyle_enabled session setting
sql.defaults.default_hash_sharded_index_bucket_count integer 8 used as bucket count if bucket count is not specified in hash sharded index definition
Expand Down
1 change: 1 addition & 0 deletions docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
<tr><td><code>sql.cross_db_sequence_owners.enabled</code></td><td>boolean</td><td><code>false</code></td><td>if true, creating sequences owned by tables from other databases is allowed</td></tr>
<tr><td><code>sql.cross_db_sequence_references.enabled</code></td><td>boolean</td><td><code>false</code></td><td>if true, sequences referenced by tables from other databases are allowed</td></tr>
<tr><td><code>sql.cross_db_views.enabled</code></td><td>boolean</td><td><code>false</code></td><td>if true, creating views that refer to other databases is allowed</td></tr>
<tr><td><code>sql.defaults.cost_scans_with_default_col_size.enabled</code></td><td>boolean</td><td><code>false</code></td><td>setting to true uses the same size for all columns to compute scan cost</td></tr>
<tr><td><code>sql.defaults.datestyle</code></td><td>enumeration</td><td><code>iso, mdy</code></td><td>default value for DateStyle session setting [iso, mdy = 0, iso, dmy = 1, iso, ymd = 2]</td></tr>
<tr><td><code>sql.defaults.datestyle.enabled</code></td><td>boolean</td><td><code>false</code></td><td>default value for datestyle_enabled session setting</td></tr>
<tr><td><code>sql.defaults.default_hash_sharded_index_bucket_count</code></td><td>integer</td><td><code>8</code></td><td>used as bucket count if bucket count is not specified in hash sharded index definition</td></tr>
Expand Down
13 changes: 13 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,13 @@ var largeFullScanRows = settings.RegisterFloatSetting(
1000.0,
).WithPublic()

var costScansWithDefaultColSize = settings.RegisterBoolSetting(
settings.TenantWritable,
`sql.defaults.cost_scans_with_default_col_size.enabled`,
"setting to true uses the same size for all columns to compute scan cost",
false,
).WithPublic()

var errNoTransactionInProgress = errors.New("there is no transaction in progress")
var errTransactionInProgress = errors.New("there is already a transaction in progress")

Expand Down Expand Up @@ -3076,6 +3083,12 @@ func (m *sessionDataMutator) SetParallelizeMultiKeyLookupJoinsEnabled(val bool)
m.data.ParallelizeMultiKeyLookupJoinsEnabled = val
}

// TODO(harding): Remove this when costing scans based on average column size
// is fully supported.
func (m *sessionDataMutator) SetCostScansWithDefaultColSize(val bool) {
m.data.CostScansWithDefaultColSize = val
}

// Utility functions related to scrubbing sensitive information on SQL Stats.

// quantizeCounts ensures that the Count field in the
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/explain_bundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,7 @@ func (c *stmtEnvCollector) PrintSessionSettings(w io.Writer) error {
{sessionSetting: "datestyle_enabled", clusterSetting: dateStyleEnabled, convFunc: boolToOnOff},
{sessionSetting: "disallow_full_table_scans", clusterSetting: disallowFullTableScans, convFunc: boolToOnOff},
{sessionSetting: "large_full_scan_rows", clusterSetting: largeFullScanRows},
{sessionSetting: "cost_scans_with_default_col_size", clusterSetting: costScansWithDefaultColSize, convFunc: boolToOnOff},
{sessionSetting: "distsql", clusterSetting: DistSQLClusterExecMode, convFunc: distsqlConv},
{sessionSetting: "vectorize", clusterSetting: VectorizeClusterMode, convFunc: vectorizeConv},
}
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -4645,6 +4645,7 @@ bytea_output hex
check_function_bodies on
client_encoding UTF8
client_min_messages notice
cost_scans_with_default_col_size off
database test
datestyle ISO, MDY
datestyle_enabled off
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -4045,6 +4045,7 @@ bytea_output hex NULL
check_function_bodies on NULL NULL NULL string
client_encoding UTF8 NULL NULL NULL string
client_min_messages notice NULL NULL NULL string
cost_scans_with_default_col_size off NULL NULL NULL string
database test NULL NULL NULL string
datestyle ISO, MDY NULL NULL NULL string
datestyle_enabled off NULL NULL NULL string
Expand Down Expand Up @@ -4158,6 +4159,7 @@ bytea_output hex NULL
check_function_bodies on NULL user NULL on on
client_encoding UTF8 NULL user NULL UTF8 UTF8
client_min_messages notice NULL user NULL notice notice
cost_scans_with_default_col_size off NULL user NULL off off
database test NULL user NULL · test
datestyle ISO, MDY NULL user NULL ISO, MDY ISO, MDY
datestyle_enabled off NULL user NULL off off
Expand Down Expand Up @@ -4265,6 +4267,7 @@ bytea_output NULL NULL NULL
check_function_bodies NULL NULL NULL NULL NULL
client_encoding NULL NULL NULL NULL NULL
client_min_messages NULL NULL NULL NULL NULL
cost_scans_with_default_col_size NULL NULL NULL NULL NULL
crdb_version NULL NULL NULL NULL NULL
database NULL NULL NULL NULL NULL
datestyle NULL NULL NULL NULL NULL
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ bytea_output hex
check_function_bodies on
client_encoding UTF8
client_min_messages notice
cost_scans_with_default_col_size off
database test
datestyle ISO, MDY
datestyle_enabled off
Expand Down
71 changes: 37 additions & 34 deletions pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,22 +135,23 @@ type Memo struct {
// planning. We need to cross-check these before reusing a cached memo.
// NOTE: If you add new fields here, be sure to add them to the relevant
// fields in explain_bundle.go.
reorderJoinsLimit int
zigzagJoinEnabled bool
useHistograms bool
useMultiColStats bool
localityOptimizedSearch bool
safeUpdates bool
preferLookupJoinsForFKs bool
saveTablesPrefix string
dateStyleEnabled bool
intervalStyleEnabled bool
dateStyle pgdate.DateStyle
intervalStyle duration.IntervalStyle
propagateInputOrdering bool
disallowFullTableScans bool
largeFullScanRows float64
nullOrderedLast bool
reorderJoinsLimit int
zigzagJoinEnabled bool
useHistograms bool
useMultiColStats bool
localityOptimizedSearch bool
safeUpdates bool
preferLookupJoinsForFKs bool
saveTablesPrefix string
dateStyleEnabled bool
intervalStyleEnabled bool
dateStyle pgdate.DateStyle
intervalStyle duration.IntervalStyle
propagateInputOrdering bool
disallowFullTableScans bool
largeFullScanRows float64
nullOrderedLast bool
costScansWithDefaultColSize bool

// curRank is the highest currently in-use scalar expression rank.
curRank opt.ScalarRank
Expand Down Expand Up @@ -180,23 +181,24 @@ func (m *Memo) Init(evalCtx *tree.EvalContext) {
// This initialization pattern ensures that fields are not unwittingly
// reused. Field reuse must be explicit.
*m = Memo{
metadata: m.metadata,
reorderJoinsLimit: int(evalCtx.SessionData().ReorderJoinsLimit),
zigzagJoinEnabled: evalCtx.SessionData().ZigzagJoinEnabled,
useHistograms: evalCtx.SessionData().OptimizerUseHistograms,
useMultiColStats: evalCtx.SessionData().OptimizerUseMultiColStats,
localityOptimizedSearch: evalCtx.SessionData().LocalityOptimizedSearch,
safeUpdates: evalCtx.SessionData().SafeUpdates,
preferLookupJoinsForFKs: evalCtx.SessionData().PreferLookupJoinsForFKs,
saveTablesPrefix: evalCtx.SessionData().SaveTablesPrefix,
intervalStyleEnabled: evalCtx.SessionData().IntervalStyleEnabled,
dateStyleEnabled: evalCtx.SessionData().DateStyleEnabled,
dateStyle: evalCtx.SessionData().GetDateStyle(),
intervalStyle: evalCtx.SessionData().GetIntervalStyle(),
propagateInputOrdering: evalCtx.SessionData().PropagateInputOrdering,
disallowFullTableScans: evalCtx.SessionData().DisallowFullTableScans,
largeFullScanRows: evalCtx.SessionData().LargeFullScanRows,
nullOrderedLast: evalCtx.SessionData().NullOrderedLast,
metadata: m.metadata,
reorderJoinsLimit: int(evalCtx.SessionData().ReorderJoinsLimit),
zigzagJoinEnabled: evalCtx.SessionData().ZigzagJoinEnabled,
useHistograms: evalCtx.SessionData().OptimizerUseHistograms,
useMultiColStats: evalCtx.SessionData().OptimizerUseMultiColStats,
localityOptimizedSearch: evalCtx.SessionData().LocalityOptimizedSearch,
safeUpdates: evalCtx.SessionData().SafeUpdates,
preferLookupJoinsForFKs: evalCtx.SessionData().PreferLookupJoinsForFKs,
saveTablesPrefix: evalCtx.SessionData().SaveTablesPrefix,
intervalStyleEnabled: evalCtx.SessionData().IntervalStyleEnabled,
dateStyleEnabled: evalCtx.SessionData().DateStyleEnabled,
dateStyle: evalCtx.SessionData().GetDateStyle(),
intervalStyle: evalCtx.SessionData().GetIntervalStyle(),
propagateInputOrdering: evalCtx.SessionData().PropagateInputOrdering,
disallowFullTableScans: evalCtx.SessionData().DisallowFullTableScans,
largeFullScanRows: evalCtx.SessionData().LargeFullScanRows,
nullOrderedLast: evalCtx.SessionData().NullOrderedLast,
costScansWithDefaultColSize: evalCtx.SessionData().CostScansWithDefaultColSize,
}
m.metadata.Init()
m.logPropsBuilder.init(evalCtx, m)
Expand Down Expand Up @@ -312,7 +314,8 @@ func (m *Memo) IsStale(
m.propagateInputOrdering != evalCtx.SessionData().PropagateInputOrdering ||
m.disallowFullTableScans != evalCtx.SessionData().DisallowFullTableScans ||
m.largeFullScanRows != evalCtx.SessionData().LargeFullScanRows ||
m.nullOrderedLast != evalCtx.SessionData().NullOrderedLast {
m.nullOrderedLast != evalCtx.SessionData().NullOrderedLast ||
m.costScansWithDefaultColSize != evalCtx.SessionData().CostScansWithDefaultColSize {
return true, nil
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/memo/memo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,12 @@ func TestMemoIsStale(t *testing.T) {
evalCtx.SessionData().NullOrderedLast = false
notStale()

// Stale enable cost scans with default column size.
evalCtx.SessionData().CostScansWithDefaultColSize = true
stale()
evalCtx.SessionData().CostScansWithDefaultColSize = false
notStale()

// Stale data sources and schema. Create new catalog so that data sources are
// recreated and can be modified independently.
catalog = testcat.New()
Expand Down
36 changes: 20 additions & 16 deletions pkg/sql/sessiondatapb/local_only_session_data.proto
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@ message LocalOnlySessionData {
string save_tables_prefix = 1;
// OptimizerFKCascadesLimit is the maximum number of cascading operations that
// are run for a single query.
int64 optimizer_fk_cascades_limit = 2 [(gogoproto.customname)="OptimizerFKCascadesLimit"];
int64 optimizer_fk_cascades_limit = 2 [(gogoproto.customname) = "OptimizerFKCascadesLimit"];
// StmtTimeout is the duration a query is permitted to run before it is
// canceled by the session. If set to 0, there is no timeout.
int64 stmt_timeout = 3 [(gogoproto.casttype)="time.Duration"];
int64 stmt_timeout = 3 [(gogoproto.casttype) = "time.Duration"];
// IdleInSessionTimeout is the duration a session is permitted to idle before
// the session is canceled. If set to 0, there is no timeout.
int64 idle_in_session_timeout = 4 [(gogoproto.casttype)="time.Duration"];
int64 idle_in_session_timeout = 4 [(gogoproto.casttype) = "time.Duration"];
// IdleInTransactionSessionTimeout is the duration a session is permitted to
// idle in a transaction before the session is canceled.
// If set to 0, there is no timeout.
int64 idle_in_transaction_session_timeout = 5 [(gogoproto.casttype)="time.Duration"];
int64 idle_in_transaction_session_timeout = 5 [(gogoproto.casttype) = "time.Duration"];
// NoticeDisplaySeverity indicates the level of Severity to send notices for the given
// session. This should ideally be of type pgnotice.DisplaySeverity, but cannot be done
// due to a circular dependency.
Expand Down Expand Up @@ -76,7 +76,7 @@ message LocalOnlySessionData {
bool safe_updates = 15;
// PreferLookupJoinsForFKs causes foreign key operations to prefer lookup
// joins.
bool prefer_lookup_joins_for_fks = 16 [(gogoproto.customname)="PreferLookupJoinsForFKs"];
bool prefer_lookup_joins_for_fks = 16 [(gogoproto.customname) = "PreferLookupJoinsForFKs"];
// ZigzagJoinEnabled indicates whether the optimizer should try and plan a
// zigzag join.
bool zigzag_join_enabled = 17;
Expand Down Expand Up @@ -140,29 +140,29 @@ message LocalOnlySessionData {
// ResultsBufferSize specifies the size at which the pgwire results buffer
// will self-flush.
int64 results_buffer_size = 38;
// PropagateInputOrdering indicates that when planning a subquery or CTE, the
// inner ordering should be propagated to the outer scope if the outer scope
// is unordered. PropagateInputOrdering is currently experimental.
bool propagate_input_ordering = 39;
// PropagateInputOrdering indicates that when planning a subquery or CTE, the
// inner ordering should be propagated to the outer scope if the outer scope
// is unordered. PropagateInputOrdering is currently experimental.
bool propagate_input_ordering = 39;
// ExperimentalDistSQLPlanningMode indicates whether the experimental
// DistSQL planning driven by the optimizer is enabled.
int64 experimental_distsql_planning_mode = 40 [
(gogoproto.customname)="ExperimentalDistSQLPlanningMode",
(gogoproto.casttype)="ExperimentalDistSQLPlanningMode"
(gogoproto.customname) = "ExperimentalDistSQLPlanningMode",
(gogoproto.casttype) = "ExperimentalDistSQLPlanningMode"
];
// DistSQLMode indicates whether to run queries using the distributed
// execution engine.
int64 dist_sql_mode = 41 [
(gogoproto.customname)="DistSQLMode",
(gogoproto.casttype)="DistSQLExecMode"
(gogoproto.customname) = "DistSQLMode",
(gogoproto.casttype) = "DistSQLExecMode"
];
// SerialNormalizationMode indicates how to handle the SERIAL pseudo-type.
int64 serial_normalization_mode = 42 [(gogoproto.casttype)="SerialNormalizationMode"];
int64 serial_normalization_mode = 42 [(gogoproto.casttype) = "SerialNormalizationMode"];
// NewSchemaChangerMode indicates whether to use the new schema changer.
int64 new_schema_changer_mode = 43 [(gogoproto.casttype)="NewSchemaChangerMode"];
int64 new_schema_changer_mode = 43 [(gogoproto.casttype) = "NewSchemaChangerMode"];
// SequenceCache stores sequence values which have been cached using the
// CACHE sequence option.
map<uint32, sessiondatapb.SequenceCacheEntry> sequence_cache = 44 [(gogoproto.casttype)="SequenceCache"];
map<uint32, sessiondatapb.SequenceCacheEntry> sequence_cache = 44 [(gogoproto.casttype) = "SequenceCache"];
// PlacementEnabled indicates whether PLACEMENT can be used or not.
bool placement_enabled = 45;
// SessionUserProto is the name of the user which has performed a SET ROLE
Expand Down Expand Up @@ -222,6 +222,10 @@ message LocalOnlySessionData {
// CheckFunctionBodies indicates whether functions are validated during
// creation.
bool check_function_bodies = 60;
// CostScansWithDefaultColSize is true when the optimizer should cost scans
// and joins using the same default number of bytes per column instead of
// column sizes from the AvgSize table statistic.
bool cost_scans_with_default_col_size = 61;

///////////////////////////////////////////////////////////////////////////
// WARNING: consider whether a session parameter you're adding needs to //
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/sessiondatapb/session_data.proto
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ message SessionData {
// attempting to acquire a lock on a key or while blocking on an existing
// lock in order to perform a non-locking read on a key.
google.protobuf.Duration lock_timeout = 15 [(gogoproto.nullable) = false,
(gogoproto.stdduration) = true];
(gogoproto.stdduration) = true];
// Internal is true if this query came from InternalExecutor or an internal
// planner.
bool internal = 16;
Expand Down Expand Up @@ -103,7 +103,7 @@ message DataConversionConfig {
// IntervalStyle indicates the style to parse and display intervals as.
util.duration.IntervalStyle interval_style = 3;
// DateStyle indicates the style to parse and display dates as.
util.timeutil.pgdate.DateStyle date_style = 4 [(gogoproto.nullable)=false];
util.timeutil.pgdate.DateStyle date_style = 4 [(gogoproto.nullable) = false];
}

// BytesEncodeFormat is the configuration for bytes to string conversions.
Expand Down
19 changes: 19 additions & 0 deletions pkg/sql/vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -1889,6 +1889,25 @@ var varGen = map[string]sessionVar{
return rowexec.ParallelizeMultiKeyLookupJoinsEnabled.String(sv)
},
},

// TODO(harding): Remove this when costing scans based on average column size
// is fully supported.
// CockroachDB extension.
`cost_scans_with_default_col_size`: {
GetStringVal: makePostgresBoolGetStringValFn(`cost_scans_with_default_col_size`),
Set: func(_ context.Context, m sessionDataMutator, s string) error {
b, err := paramparse.ParseBoolVar(`cost_scans_with_default_col_size`, s)
if err != nil {
return err
}
m.SetCostScansWithDefaultColSize(b)
return nil
},
Get: func(evalCtx *extendedEvalContext) (string, error) {
return formatBoolAsPostgresSetting(evalCtx.SessionData().CostScansWithDefaultColSize), nil
},
GlobalDefault: globalFalse,
},
}

const compatErrMsg = "this parameter is currently recognized only for compatibility and has no effect in CockroachDB."
Expand Down

0 comments on commit af32d9d

Please sign in to comment.