Skip to content

Commit

Permalink
sql: fix crash when planning stats collection on virtual col with UDT
Browse files Browse the repository at this point in the history
CREATE STATISTICS executes in multiple layers: the "outer" layer is a
normal execution of the CREATE STATISTICS statement. During this
execution, we create, start, and await a CreateStats job. The
CreateStats job in turn starts an "inner" layer which uses a separate
internal transaction to plan and execute distributed statistics
collection.

Within the inner layer, we were using the job's planner (JobExecContext)
to plan distributed stats collection. This planner has no associated
transaction. If it weren't for user-defined types, this would be fine,
but user-defined types must be resolved using a transaction.

We had a hack in place to set the evalCtx.Txn to the internal
transaction in order to execute collection on normal columns with
user-defined type. But this hack did not work for virtual computed
columns with user-defined type, because type-checking their expressions
uses more facilites of the planner than just the evalCtx. (Specifically
the schemaResolver.)

So, instead of setting the evalCtx.Txn, we create a new "inner" planner
that is associated with the internal transaction of the inner
layer. This works for all columns with user-defined type.

Fixes: cockroachdb#123821

Release note (bug fix): Fix a crash introduced in v24.1.0-beta.2 that
could occur when planning statistics collection on a table with a
virtual computed column using a user-defined type when the
newly-introduced cluster setting
`sql.stats.virtual_computed_columns.enabled` is set to true. (The
setting was introduced in v24.1.0-alpha.1 default true.)
  • Loading branch information
michae2 committed May 13, 2024
1 parent 285bad7 commit 2ba4c10
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 13 deletions.
46 changes: 33 additions & 13 deletions pkg/sql/create_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@ func StubTableStats(
// framework.
type createStatsNode struct {
tree.CreateStats

// p is the "outer planner" from planning the CREATE STATISTICS
// statement. When we startExec the createStatsNode, it creates a job which
// has a second planner (the JobExecContext). When the job resumes, it does
// its work using a retrying internal transaction for which we create a third
// "inner planner".
p *planner

// runAsJob is true by default, and causes the code below to be executed,
Expand Down Expand Up @@ -639,34 +645,48 @@ var _ jobs.Resumer = &createStatsResumer{}

// Resume is part of the jobs.Resumer interface.
func (r *createStatsResumer) Resume(ctx context.Context, execCtx interface{}) error {
p := execCtx.(JobExecContext)
// jobsPlanner is a second planner distinct from the "outer planner" in the
// createStatsNode. It comes from the jobs system and does not have an
// associated txn.
jobsPlanner := execCtx.(JobExecContext)
details := r.job.Details().(jobspb.CreateStatsDetails)
if details.Name == jobspb.AutoStatsName {
// We want to make sure that an automatic CREATE STATISTICS job only runs if
// there are no other CREATE STATISTICS jobs running, automatic or manual.
if err := checkRunningJobs(ctx, r.job, p); err != nil {
if err := checkRunningJobs(ctx, r.job, jobsPlanner); err != nil {
return err
}
}

r.tableID = details.Table.ID
evalCtx := p.ExtendedEvalContext()

dsp := p.DistSQLPlanner()
if err := p.ExecCfg().InternalDB.Txn(ctx, func(ctx context.Context, txn isql.Txn) error {
// Set the transaction on the EvalContext to this txn. This allows for
// use of the txn during processor setup during the execution of the flow.
evalCtx.Txn = txn.KV()

evalCtx := jobsPlanner.ExtendedEvalContext()

if err := jobsPlanner.ExecCfg().InternalDB.Txn(ctx, func(ctx context.Context, txn isql.Txn) error {
// We create a third "inner planner" associated with this txn in order to
// have (a) use of the txn during type checking of any virtual computed
// column expressions, and (b) use of the txn during processor setup during
// the execution of the flow.
innerPlanner, cleanup := NewInternalPlanner(
"create-stats-resume-job",
txn.KV(),
jobsPlanner.User(),
&MemoryMetrics{},
jobsPlanner.ExecCfg(),
jobsPlanner.SessionData(),
)
defer cleanup()
innerP := innerPlanner.(*planner)
innerEvalCtx := innerP.ExtendedEvalContext()
if details.AsOf != nil {
p.ExtendedEvalContext().AsOfSystemTime = &eval.AsOfSystemTime{Timestamp: *details.AsOf}
p.ExtendedEvalContext().SetTxnTimestamp(details.AsOf.GoTime())
innerP.ExtendedEvalContext().AsOfSystemTime = &eval.AsOfSystemTime{Timestamp: *details.AsOf}
innerP.ExtendedEvalContext().SetTxnTimestamp(details.AsOf.GoTime())
if err := txn.KV().SetFixedTimestamp(ctx, *details.AsOf); err != nil {
return err
}
}

planCtx := dsp.NewPlanningCtx(ctx, evalCtx, nil /* planner */, txn.KV(), FullDistribution)
dsp := innerP.DistSQLPlanner()
planCtx := dsp.NewPlanningCtx(ctx, innerEvalCtx, innerP, txn.KV(), FullDistribution)
// CREATE STATS flow doesn't produce any rows and only emits the
// metadata, so we can use a nil rowContainerHelper.
resultWriter := NewRowResultWriter(nil /* rowContainer */)
Expand Down
69 changes: 69 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -2877,3 +2877,72 @@ vectorized: true
estimated row count: 10 (1.0% of the table; stats collected <hidden> ago)
table: mno@mno_o_idx
spans: [/11 - /11]

# Regression for not setting the TypeResolver on the SemaContext when dealing
# with stats on virtual computed columns (#122312).
statement ok
CREATE TABLE t122312 (s STRING, g greeting AS (s::greeting) VIRTUAL)

statement ok
ANALYZE t122312

# Regression for not setting the txn of the schemaResolver when type checking
# stats on virtual computed columns.
statement ok
CREATE TYPE order_status AS ENUM ('pending', 'paid', 'dispatched', 'delivered')

statement ok
CREATE TABLE orders (
"id" UUID PRIMARY KEY DEFAULT gen_random_uuid(),
"customer_id" UUID NOT NULL,
"total" DECIMAL NOT NULL,
"balance" DECIMAL NOT NULL,
"order_ts" TIMESTAMPTZ(0) NOT NULL DEFAULT now(),
"dispatch_ts" TIMESTAMPTZ(0),
"delivery_ts" TIMESTAMPTZ(0),
"status" order_status AS (
CASE
WHEN "delivery_ts" IS NOT NULL THEN 'delivered'
WHEN "dispatch_ts" IS NOT NULL THEN 'dispatched'
WHEN "balance" = 0 THEN 'paid'
ELSE 'pending'
END) VIRTUAL,
INDEX ("status")
)

statement ok
INSERT INTO orders ("customer_id", "total", "balance", "dispatch_ts", "delivery_ts") VALUES
('bdeb232e-12e9-4a33-9dd5-7bb9b694291a', 100, 100, NULL, NULL),
('0dc59725-d20b-4370-a05d-11db025a0064', 200, 0, NULL, NULL),
('d53d4021-9390-4b3a-9e5a-4bf1ff3e5a4c', 300, 0, now(), NULL),
('d53d4021-9390-4b3a-9e5a-4bf1ff3e5a4c', 400, 0, now(), now())

statement ok
ANALYZE orders

query TIIIIB colnames
SELECT column_names, row_count, null_count, distinct_count, avg_size, histogram_id IS NOT NULL AS has_histogram
FROM [SHOW STATISTICS FOR TABLE orders]
ORDER BY column_names::STRING
----
column_names row_count null_count distinct_count avg_size has_histogram
{balance} 4 0 2 32 true
{customer_id} 4 0 3 16 true
{delivery_ts} 4 3 2 6 true
{dispatch_ts} 4 2 2 12 true
{id} 4 0 4 16 true
{order_ts} 4 0 1 24 true
{status} 4 0 4 48 true
{total} 4 0 4 32 true

let $hist_status
SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE orders] WHERE column_names = ARRAY['status']

query TIRI colnames,nosort
SHOW HISTOGRAM $hist_status
----
upper_bound range_rows distinct_range_rows equal_rows
'pending' 0 0 1
'paid' 0 0 1
'dispatched' 0 0 1
'delivered' 0 0 1

0 comments on commit 2ba4c10

Please sign in to comment.