From e43920f1fd3325d478727f424ace26bfb42b1b60 Mon Sep 17 00:00:00 2001 From: tyrelr <44035897+tyrelr@users.noreply.github.com> Date: Fri, 3 Feb 2023 16:21:41 -0600 Subject: [PATCH] Sqlite describe fixes (#2253) * add failing test for nested orderby * log query paths which were abandoned due to invalid state or looping. Allow instructions to be executed a small number of times to fix nested order by query * add failing testcase using nested orderby * fix handling of sequence/offset and rewind * fix handling when sqlite nests records inside of records * add test of temporary table handling * WIP add test failure for temp table access * fix support for temp tables * add tests for sqlite datetime functions * add basic date and time function support * handle gosub opcode correctly * add group by test * fix group by handling * add additional passing group by test * add test case for simple limit query * fix IfPos & If touching wrong branches state, fix IfPos using wrong branch criteria * add test for large offsets * add short-circuit for possible query offset loops * add groupby query that is predicted incorrectly * fix handling of integer cast failures * add tests for single-row aggregate results * fix handling of null-based branching * add test for coercion of text by sum * fix calculation of sum value coercion * add failing test for recursive with query * add logic for delete operation to fix queries grouping by columns from a recursive query --- sqlx-sqlite/src/connection/explain.rs | 606 ++++++++++++++++++++------ tests/sqlite/describe.rs | 380 ++++++++++++++++ 2 files changed, 851 insertions(+), 135 deletions(-) diff --git a/sqlx-sqlite/src/connection/explain.rs b/sqlx-sqlite/src/connection/explain.rs index 850e87f652..c9247b4ef2 100644 --- a/sqlx-sqlite/src/connection/explain.rs +++ b/sqlx-sqlite/src/connection/explain.rs @@ -19,6 +19,7 @@ const SQLITE_AFF_REAL: u8 = 0x45; /* 'E' */ const OP_INIT: &str = "Init"; const OP_GOTO: &str = "Goto"; const OP_DECR_JUMP_ZERO: &str = "DecrJumpZero"; +const OP_DELETE: &str = "Delete"; const OP_ELSE_EQ: &str = "ElseEq"; const OP_EQ: &str = "Eq"; const OP_END_COROUTINE: &str = "EndCoroutine"; @@ -67,6 +68,7 @@ const OP_SEEK_LE: &str = "SeekLE"; const OP_SEEK_LT: &str = "SeekLT"; const OP_SEEK_ROW_ID: &str = "SeekRowId"; const OP_SEEK_SCAN: &str = "SeekScan"; +const OP_SEQUENCE: &str = "Sequence"; const OP_SEQUENCE_TEST: &str = "SequenceTest"; const OP_SORT: &str = "Sort"; const OP_SORTER_DATA: &str = "SorterData"; @@ -120,18 +122,24 @@ const OP_MULTIPLY: &str = "Multiply"; const OP_DIVIDE: &str = "Divide"; const OP_REMAINDER: &str = "Remainder"; const OP_CONCAT: &str = "Concat"; +const OP_OFFSET_LIMIT: &str = "OffsetLimit"; const OP_RESULT_ROW: &str = "ResultRow"; const OP_HALT: &str = "Halt"; -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -struct ColumnType { - pub datatype: DataType, - pub nullable: Option, +const MAX_LOOP_COUNT: u8 = 2; + +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +enum ColumnType { + Single { + datatype: DataType, + nullable: Option, + }, + Record(Vec), } impl Default for ColumnType { fn default() -> Self { - Self { + Self::Single { datatype: DataType::Null, nullable: None, } @@ -140,43 +148,48 @@ impl Default for ColumnType { impl ColumnType { fn null() -> Self { - Self { + Self::Single { datatype: DataType::Null, nullable: Some(true), } } + fn map_to_datatype(&self) -> DataType { + match self { + Self::Single { datatype, .. } => datatype.clone(), + Self::Record(_) => DataType::Null, //If we're trying to coerce to a regular Datatype, we can assume a Record is invalid for the context + } + } + fn map_to_nullable(&self) -> Option { + match self { + Self::Single { nullable, .. } => *nullable, + Self::Record(_) => None, //If we're trying to coerce to a regular Datatype, we can assume a Record is invalid for the context + } + } } #[derive(Debug, Clone, Eq, PartialEq, Hash)] enum RegDataType { Single(ColumnType), - Record(Vec), Int(i64), } impl RegDataType { fn map_to_datatype(&self) -> DataType { match self { - RegDataType::Single(d) => d.datatype, - RegDataType::Record(_) => DataType::Null, //If we're trying to coerce to a regular Datatype, we can assume a Record is invalid for the context + RegDataType::Single(d) => d.map_to_datatype(), RegDataType::Int(_) => DataType::Int, } } fn map_to_nullable(&self) -> Option { match self { - RegDataType::Single(d) => d.nullable, - RegDataType::Record(_) => None, //If we're trying to coerce to a regular Datatype, we can assume a Record is invalid for the context + RegDataType::Single(d) => d.map_to_nullable(), RegDataType::Int(_) => Some(false), } } fn map_to_columntype(&self) -> ColumnType { match self { - RegDataType::Single(d) => *d, - RegDataType::Record(_) => ColumnType { - datatype: DataType::Null, - nullable: None, - }, //If we're trying to coerce to a regular Datatype, we can assume a Record is invalid for the context - RegDataType::Int(_) => ColumnType { + RegDataType::Single(d) => d.clone(), + RegDataType::Int(_) => ColumnType::Single { datatype: DataType::Int, nullable: Some(false), }, @@ -198,7 +211,7 @@ impl CursorDataType { Self::Normal { cols: record .iter() - .map(|(&colnum, &datatype)| (colnum, datatype)) + .map(|(colnum, datatype)| (*colnum, datatype.clone())) .collect(), is_empty, } @@ -206,7 +219,7 @@ impl CursorDataType { fn from_dense_record(record: &Vec, is_empty: Option) -> Self { Self::Normal { - cols: (0..).zip(record.iter().copied()).collect(), + cols: (0..).zip(record.iter().cloned()).collect(), is_empty, } } @@ -221,7 +234,7 @@ impl CursorDataType { rowdata } Self::Pseudo(i) => match registers.get(i) { - Some(RegDataType::Record(r)) => r.clone(), + Some(RegDataType::Single(ColumnType::Record(r))) => r.clone(), _ => Vec::new(), }, } @@ -234,7 +247,9 @@ impl CursorDataType { match self { Self::Normal { cols, .. } => cols.clone(), Self::Pseudo(i) => match registers.get(i) { - Some(RegDataType::Record(r)) => (0..).zip(r.iter().copied()).collect(), + Some(RegDataType::Single(ColumnType::Record(r))) => { + (0..).zip(r.iter().cloned()).collect() + } _ => HashMap::new(), }, } @@ -275,24 +290,22 @@ fn opcode_to_type(op: &str) -> DataType { fn root_block_columns( conn: &mut ConnectionState, -) -> Result>, Error> { - let table_block_columns: Vec<(i64, i64, String, bool)> = execute::iter( +) -> Result>, Error> { + let table_block_columns: Vec<(i64, i64, i64, String, bool)> = execute::iter( conn, - "SELECT s.rootpage, col.cid as colnum, col.type, col.\"notnull\" - FROM (select * from sqlite_temp_schema UNION select * from sqlite_schema) s + "SELECT s.dbnum, s.rootpage, col.cid as colnum, col.type, col.\"notnull\" + FROM ( + select 1 dbnum, tss.* from temp.sqlite_schema tss + UNION ALL select 0 dbnum, mss.* from main.sqlite_schema mss + ) s JOIN pragma_table_info(s.name) AS col - WHERE s.type = 'table'", - None, - false, - )? - .filter_map(|res| res.map(|either| either.right()).transpose()) - .map(|row| FromRow::from_row(&row?)) - .collect::, Error>>()?; - - let index_block_columns: Vec<(i64, i64, String, bool)> = execute::iter( - conn, - "SELECT s.rootpage, idx.seqno as colnum, col.type, col.\"notnull\" - FROM (select * from sqlite_temp_schema UNION select * from sqlite_schema) s + WHERE s.type = 'table' + UNION ALL + SELECT s.dbnum, s.rootpage, idx.seqno as colnum, col.type, col.\"notnull\" + FROM ( + select 1 dbnum, tss.* from temp.sqlite_schema tss + UNION ALL select 0 dbnum, mss.* from main.sqlite_schema mss + ) s JOIN pragma_index_info(s.name) AS idx LEFT JOIN pragma_table_info(s.tbl_name) as col ON col.cid = idx.cid @@ -304,22 +317,12 @@ fn root_block_columns( .map(|row| FromRow::from_row(&row?)) .collect::, Error>>()?; - let mut row_info: HashMap> = HashMap::new(); - for (block, colnum, datatype, notnull) in table_block_columns { - let row_info = row_info.entry(block).or_default(); - row_info.insert( - colnum, - ColumnType { - datatype: datatype.parse().unwrap_or(DataType::Null), - nullable: Some(!notnull), - }, - ); - } - for (block, colnum, datatype, notnull) in index_block_columns { - let row_info = row_info.entry(block).or_default(); + let mut row_info: HashMap<(i64, i64), HashMap> = HashMap::new(); + for (dbnum, block, colnum, datatype, notnull) in table_block_columns { + let row_info = row_info.entry((dbnum, block)).or_default(); row_info.insert( colnum, - ColumnType { + ColumnType::Single { datatype: datatype.parse().unwrap_or(DataType::Null), nullable: Some(!notnull), }, @@ -331,7 +334,9 @@ fn root_block_columns( #[derive(Debug, Clone, PartialEq)] struct QueryState { - pub visited: Vec, + // The number of times each instruction has been visited + pub visited: Vec, + // A log of the order of execution of each instruction pub history: Vec, // Registers pub r: HashMap, @@ -412,7 +417,7 @@ pub(super) fn explain( crate::logger::QueryPlanLogger::new(query, &program, conn.log_settings.clone()); let mut states = vec![QueryState { - visited: vec![false; program_size], + visited: vec![0; program_size], history: Vec::new(), r: HashMap::with_capacity(6), p: HashMap::with_capacity(6), @@ -426,40 +431,52 @@ pub(super) fn explain( while let Some(mut state) = states.pop() { while state.program_i < program_size { - if state.visited[state.program_i] { - state.program_i += 1; + let (_, ref opcode, p1, p2, p3, ref p4) = program[state.program_i]; + state.history.push(state.program_i); + + if state.visited[state.program_i] > MAX_LOOP_COUNT { + if logger.log_enabled() { + let program_history: Vec<&(i64, String, i64, i64, i64, Vec)> = + state.history.iter().map(|i| &program[*i]).collect(); + logger.add_result((program_history, None)); + } + //avoid (infinite) loops by breaking if we ever hit the same instruction twice break; } - let (_, ref opcode, p1, p2, p3, ref p4) = program[state.program_i]; - state.history.push(state.program_i); + + state.visited[state.program_i] += 1; match &**opcode { OP_INIT => { // start at - state.visited[state.program_i] = true; state.program_i = p2 as usize; continue; } OP_GOTO => { // goto - state.visited[state.program_i] = true; + + state.program_i = p2 as usize; + continue; + } + + OP_GO_SUB => { + // store current instruction in r[p1], goto + state.r.insert(p1, RegDataType::Int(state.program_i as i64)); state.program_i = p2 as usize; continue; } OP_DECR_JUMP_ZERO | OP_ELSE_EQ | OP_EQ | OP_FILTER | OP_FK_IF_ZERO | OP_FOUND - | OP_GE | OP_GO_SUB | OP_GT | OP_IDX_GE | OP_IDX_GT | OP_IDX_LE | OP_IDX_LT - | OP_IF | OP_IF_NO_HOPE | OP_IF_NOT | OP_IF_NOT_OPEN | OP_IF_NOT_ZERO - | OP_IF_NULL_ROW | OP_IF_POS | OP_IF_SMALLER | OP_INCR_VACUUM | OP_IS_NULL - | OP_IS_NULL_OR_TYPE | OP_LE | OP_LT | OP_MUST_BE_INT | OP_NE | OP_NEXT - | OP_NO_CONFLICT | OP_NOT_EXISTS | OP_NOT_NULL | OP_ONCE | OP_PREV | OP_PROGRAM + | OP_GE | OP_GT | OP_IDX_GE | OP_IDX_GT | OP_IDX_LE | OP_IDX_LT | OP_IF_NO_HOPE + | OP_IF_NOT | OP_IF_NOT_OPEN | OP_IF_NOT_ZERO | OP_IF_NULL_ROW | OP_IF_SMALLER + | OP_INCR_VACUUM | OP_IS_NULL | OP_IS_NULL_OR_TYPE | OP_LE | OP_LT | OP_NE + | OP_NEXT | OP_NO_CONFLICT | OP_NOT_EXISTS | OP_ONCE | OP_PREV | OP_PROGRAM | OP_ROW_SET_READ | OP_ROW_SET_TEST | OP_SEEK_GE | OP_SEEK_GT | OP_SEEK_LE | OP_SEEK_LT | OP_SEEK_ROW_ID | OP_SEEK_SCAN | OP_SEQUENCE_TEST | OP_SORTER_NEXT | OP_V_FILTER | OP_V_NEXT => { // goto or next instruction (depending on actual values) - state.visited[state.program_i] = true; let mut branch_state = state.clone(); branch_state.program_i = p2 as usize; @@ -474,9 +491,157 @@ pub(super) fn explain( continue; } + OP_NOT_NULL => { + // goto or next instruction (depending on actual values) + + let might_branch = match state.r.get(&p1) { + Some(r_p1) => !matches!(r_p1.map_to_datatype(), DataType::Null), + _ => false, + }; + + let might_not_branch = match state.r.get(&p1) { + Some(r_p1) => !matches!(r_p1.map_to_nullable(), Some(false)), + _ => false, + }; + + if might_branch { + let mut branch_state = state.clone(); + branch_state.program_i = p2 as usize; + if let Some(RegDataType::Single(ColumnType::Single { nullable, .. })) = + branch_state.r.get_mut(&p1) + { + *nullable = Some(false); + } + + let bs_hash = BranchStateHash::from_query_state(&branch_state); + if !visited_branch_state.contains(&bs_hash) { + visited_branch_state.insert(bs_hash); + states.push(branch_state); + } + } + + if might_not_branch { + state.program_i += 1; + state + .r + .insert(p1, RegDataType::Single(ColumnType::default())); + continue; + } else { + break; + } + } + + OP_MUST_BE_INT => { + // if p1 can be coerced to int, continue + // if p1 cannot be coerced to int, error if p2 == 0, else jump to p2 + + //don't bother checking actual types, just don't branch to instruction 0 + if p2 != 0 { + let mut branch_state = state.clone(); + branch_state.program_i = p2 as usize; + + let bs_hash = BranchStateHash::from_query_state(&branch_state); + if !visited_branch_state.contains(&bs_hash) { + visited_branch_state.insert(bs_hash); + states.push(branch_state); + } + } + + state.program_i += 1; + continue; + } + + OP_IF => { + // goto if r[p1] is true (1) or r[p1] is null and p3 is nonzero + + let might_branch = match state.r.get(&p1) { + Some(RegDataType::Int(r_p1)) => *r_p1 != 0, + _ => true, + }; + + let might_not_branch = match state.r.get(&p1) { + Some(RegDataType::Int(r_p1)) => *r_p1 == 0, + _ => true, + }; + + if might_branch { + let mut branch_state = state.clone(); + branch_state.program_i = p2 as usize; + if p3 == 0 { + branch_state.r.insert(p1, RegDataType::Int(1)); + } + + let bs_hash = BranchStateHash::from_query_state(&branch_state); + if !visited_branch_state.contains(&bs_hash) { + visited_branch_state.insert(bs_hash); + states.push(branch_state); + } + } + + if might_not_branch { + state.program_i += 1; + if p3 == 0 { + state.r.insert(p1, RegDataType::Int(0)); + } + continue; + } else { + break; + } + } + + OP_IF_POS => { + // goto if r[p1] is true (1) or r[p1] is null and p3 is nonzero + + // as a workaround for large offset clauses, both branches will be attempted after 1 loop + + let might_branch = match state.r.get(&p1) { + Some(RegDataType::Int(r_p1)) => *r_p1 >= 1, + _ => true, + }; + + let might_not_branch = match state.r.get(&p1) { + Some(RegDataType::Int(r_p1)) => *r_p1 < 1, + _ => true, + }; + + let loop_detected = state.visited[state.program_i] > 1; + if might_branch || loop_detected { + let mut branch_state = state.clone(); + branch_state.program_i = p2 as usize; + if let Some(RegDataType::Int(r_p1)) = branch_state.r.get_mut(&p1) { + *r_p1 -= 1; + } + states.push(branch_state); + } + + if might_not_branch { + state.program_i += 1; + continue; + } else if loop_detected { + state.program_i += 1; + if matches!(state.r.get_mut(&p1), Some(RegDataType::Int(..))) { + //forget the exact value, in case some later cares + state.r.insert( + p1, + RegDataType::Single(ColumnType::Single { + datatype: DataType::Int64, + nullable: Some(false), + }), + ); + } + continue; + } else { + break; + } + } + OP_REWIND | OP_LAST | OP_SORT | OP_SORTER_SORT => { - // goto if cursor p1 is empty, else next instruction - state.visited[state.program_i] = true; + // goto if cursor p1 is empty and p2 != 0, else next instruction + + if p2 == 0 { + state.program_i += 1; + continue; + } if let Some(cursor) = state.p.get(&p1) { if matches!(cursor.is_empty(), None | Some(true)) { @@ -497,15 +662,23 @@ pub(super) fn explain( //only take this branch if the cursor is non-empty state.program_i += 1; continue; + } else { + break; } } + if logger.log_enabled() { + let program_history: Vec<&(i64, String, i64, i64, i64, Vec)> = + state.history.iter().map(|i| &program[*i]).collect(); + logger.add_result((program_history, None)); + } + break; } OP_INIT_COROUTINE => { // goto or next instruction (depending on actual values) - state.visited[state.program_i] = true; + state.r.insert(p1, RegDataType::Int(p3)); if p2 != 0 { @@ -518,7 +691,7 @@ pub(super) fn explain( OP_END_COROUTINE => { // jump to p2 of the yield instruction pointed at by register p1 - state.visited[state.program_i] = true; + if let Some(RegDataType::Int(yield_i)) = state.r.get(&p1) { if let Some((_, yield_op, _, yield_p2, _, _)) = program.get(*yield_i as usize) @@ -528,31 +701,58 @@ pub(super) fn explain( state.r.remove(&p1); continue; } else { + if logger.log_enabled() { + let program_history: Vec<&( + i64, + String, + i64, + i64, + i64, + Vec, + )> = state.history.iter().map(|i| &program[*i]).collect(); + logger.add_result((program_history, None)); + } + break; } } else { + if logger.log_enabled() { + let program_history: Vec<&(i64, String, i64, i64, i64, Vec)> = + state.history.iter().map(|i| &program[*i]).collect(); + logger.add_result((program_history, None)); + } break; } } else { + if logger.log_enabled() { + let program_history: Vec<&(i64, String, i64, i64, i64, Vec)> = + state.history.iter().map(|i| &program[*i]).collect(); + logger.add_result((program_history, None)); + } break; } } OP_RETURN => { // jump to the instruction after the instruction pointed at by register p1 - state.visited[state.program_i] = true; + if let Some(RegDataType::Int(return_i)) = state.r.get(&p1) { state.program_i = (*return_i + 1) as usize; state.r.remove(&p1); continue; } else { + if logger.log_enabled() { + let program_history: Vec<&(i64, String, i64, i64, i64, Vec)> = + state.history.iter().map(|i| &program[*i]).collect(); + logger.add_result((program_history, None)); + } break; } } OP_YIELD => { // jump to p2 of the yield instruction pointed at by register p1, store prior instruction in p1 - state.visited[state.program_i] = true; + if let Some(RegDataType::Int(yield_i)) = state.r.get_mut(&p1) { let program_i: usize = state.program_i; @@ -571,13 +771,17 @@ pub(super) fn explain( continue; } } else { + if logger.log_enabled() { + let program_history: Vec<&(i64, String, i64, i64, i64, Vec)> = + state.history.iter().map(|i| &program[*i]).collect(); + logger.add_result((program_history, None)); + } break; } } OP_JUMP => { // goto one of , , or based on the result of a prior compare - state.visited[state.program_i] = true; let mut branch_state = state.clone(); branch_state.program_i = p1 as usize; @@ -610,7 +814,7 @@ pub(super) fn explain( { if let Some(col) = record.get(&p2) { // insert into p3 the datatype of the col - state.r.insert(p3, RegDataType::Single(*col)); + state.r.insert(p3, RegDataType::Single(col.clone())); } else { state .r @@ -623,13 +827,30 @@ pub(super) fn explain( } } + OP_SEQUENCE => { + //Copy sequence number from cursor p1 to register p2, increment cursor p1 sequence number + + //Cursor emulation doesn't sequence value, but it is an int + state.r.insert( + p2, + RegDataType::Single(ColumnType::Single { + datatype: DataType::Int64, + nullable: Some(false), + }), + ); + } + OP_ROW_DATA | OP_SORTER_DATA => { //Get entire row from cursor p1, store it into register p2 if let Some(record) = state.p.get(&p1) { let rowdata = record.map_to_dense_record(&state.r); - state.r.insert(p2, RegDataType::Record(rowdata)); + state + .r + .insert(p2, RegDataType::Single(ColumnType::Record(rowdata))); } else { - state.r.insert(p2, RegDataType::Record(Vec::new())); + state + .r + .insert(p2, RegDataType::Single(ColumnType::Record(Vec::new()))); } } @@ -645,30 +866,43 @@ pub(super) fn explain( .unwrap_or(ColumnType::default()), ); } - state.r.insert(p3, RegDataType::Record(record)); + state + .r + .insert(p3, RegDataType::Single(ColumnType::Record(record))); } OP_INSERT | OP_IDX_INSERT | OP_SORTER_INSERT => { - if let Some(RegDataType::Record(record)) = state.r.get(&p2) { + if let Some(RegDataType::Single(ColumnType::Record(record))) = state.r.get(&p2) + { if let Some(CursorDataType::Normal { cols, is_empty }) = state.p.get_mut(&p1) { // Insert the record into wherever pointer p1 is - *cols = (0..).zip(record.iter().copied()).collect(); + *cols = (0..).zip(record.iter().cloned()).collect(); *is_empty = Some(false); } } //Noop if the register p2 isn't a record, or if pointer p1 does not exist } + OP_DELETE => { + // delete a record from cursor p1 + if let Some(CursorDataType::Normal { is_empty, .. }) = state.p.get_mut(&p1) { + if *is_empty == Some(false) { + *is_empty = None; //the cursor might be empty now + } + } + } + OP_OPEN_PSEUDO => { // Create a cursor p1 aliasing the record from register p2 state.p.insert(p1, CursorDataType::Pseudo(p2)); } + OP_OPEN_READ | OP_OPEN_WRITE => { //Create a new pointer which is referenced by p1, take column metadata from db schema if found - if p3 == 0 { - if let Some(columns) = root_block_cols.get(&p2) { + if p3 == 0 || p3 == 1 { + if let Some(columns) = root_block_cols.get(&(p3, p2)) { state .p .insert(p1, CursorDataType::from_sparse_record(columns, None)); @@ -715,12 +949,42 @@ pub(super) fn explain( // last_insert_rowid() -> INTEGER state.r.insert( p3, - RegDataType::Single(ColumnType { + RegDataType::Single(ColumnType::Single { datatype: DataType::Int64, nullable: Some(false), }), ); } + "date(-1)" | "time(-1)" | "datetime(-1)" | "strftime(-1)" => { + // date|time|datetime|strftime(...) -> TEXT + state.r.insert( + p3, + RegDataType::Single(ColumnType::Single { + datatype: DataType::Text, + nullable: Some(p2 != 0), //never a null result if no argument provided + }), + ); + } + "julianday(-1)" => { + // julianday(...) -> REAL + state.r.insert( + p3, + RegDataType::Single(ColumnType::Single { + datatype: DataType::Float, + nullable: Some(p2 != 0), //never a null result if no argument provided + }), + ); + } + "unixepoch(-1)" => { + // unixepoch(p2...) -> INTEGER + state.r.insert( + p3, + RegDataType::Single(ColumnType::Single { + datatype: DataType::Int64, + nullable: Some(p2 != 0), //never a null result if no argument provided + }), + ); + } _ => logger.add_unknown_operation(&program[state.program_i]), } @@ -730,8 +994,13 @@ pub(super) fn explain( // all columns in cursor X are potentially nullable if let Some(CursorDataType::Normal { ref mut cols, .. }) = state.p.get_mut(&p1) { - for ref mut col in cols.values_mut() { - col.nullable = Some(true); + for col in cols.values_mut() { + if let ColumnType::Single { + ref mut nullable, .. + } = col + { + *nullable = Some(true); + } } } //else we don't know about the cursor @@ -750,11 +1019,25 @@ pub(super) fn explain( // count(_) -> INTEGER state.r.insert( p3, - RegDataType::Single(ColumnType { + RegDataType::Single(ColumnType::Single { datatype: DataType::Int64, nullable: Some(false), }), ); + } else if p4.starts_with("sum(") { + if let Some(r_p2) = state.r.get(&p2) { + let datatype = match r_p2.map_to_datatype() { + DataType::Int64 => DataType::Int64, + DataType::Int => DataType::Int, + DataType::Bool => DataType::Int, + _ => DataType::Float, + }; + let nullable = r_p2.map_to_nullable(); + state.r.insert( + p3, + RegDataType::Single(ColumnType::Single { datatype, nullable }), + ); + } } else if let Some(v) = state.r.get(&p2).cloned() { // r[p3] = AGG ( r[p2] ) state.r.insert(p3, v); @@ -773,21 +1056,18 @@ pub(super) fn explain( // count(_) -> INTEGER state.r.insert( p1, - RegDataType::Single(ColumnType { + RegDataType::Single(ColumnType::Single { datatype: DataType::Int64, nullable: Some(false), }), ); - } else if let Some(v) = state.r.get(&p2).cloned() { - // r[p3] = AGG ( r[p2] ) - state.r.insert(p3, v); } } OP_CAST => { // affinity(r[p1]) if let Some(v) = state.r.get_mut(&p1) { - *v = RegDataType::Single(ColumnType { + *v = RegDataType::Single(ColumnType::Single { datatype: affinity_to_type(p2 as u8), nullable: v.map_to_nullable(), }); @@ -837,7 +1117,7 @@ pub(super) fn explain( // r[p2] = state.r.insert( p2, - RegDataType::Single(ColumnType { + RegDataType::Single(ColumnType::Single { datatype: opcode_to_type(&opcode), nullable: Some(false), }), @@ -867,7 +1147,7 @@ pub(super) fn explain( (Some(a), Some(b)) => { state.r.insert( p3, - RegDataType::Single(ColumnType { + RegDataType::Single(ColumnType::Single { datatype: if matches!(a.map_to_datatype(), DataType::Null) { b.map_to_datatype() } else { @@ -886,7 +1166,7 @@ pub(super) fn explain( (Some(v), None) => { state.r.insert( p3, - RegDataType::Single(ColumnType { + RegDataType::Single(ColumnType::Single { datatype: v.map_to_datatype(), nullable: None, }), @@ -896,7 +1176,7 @@ pub(super) fn explain( (None, Some(v)) => { state.r.insert( p3, - RegDataType::Single(ColumnType { + RegDataType::Single(ColumnType::Single { datatype: v.map_to_datatype(), nullable: None, }), @@ -907,9 +1187,20 @@ pub(super) fn explain( } } + OP_OFFSET_LIMIT => { + // r[p2] = if r[p2] < 0 { r[p1] } else if r[p1]<0 { -1 } else { r[p1] + r[p3] } + state.r.insert( + p2, + RegDataType::Single(ColumnType::Single { + datatype: DataType::Int64, + nullable: Some(false), + }), + ); + } + OP_RESULT_ROW => { // output = r[p1 .. p1 + p2] - state.visited[state.program_i] = true; + state.result = Some( (p1..p1 + p2) .map(|i| { @@ -928,13 +1219,18 @@ pub(super) fn explain( if logger.log_enabled() { let program_history: Vec<&(i64, String, i64, i64, i64, Vec)> = state.history.iter().map(|i| &program[*i]).collect(); - logger.add_result((program_history, state.result.clone())); + logger.add_result((program_history, Some(state.result.clone()))); } result_states.push(state.clone()); } OP_HALT => { + if logger.log_enabled() { + let program_history: Vec<&(i64, String, i64, i64, i64, Vec)> = + state.history.iter().map(|i| &program[*i]).collect(); + logger.add_result((program_history, None)); + } break; } @@ -945,7 +1241,6 @@ pub(super) fn explain( } } - state.visited[state.program_i] = true; state.program_i += 1; } } @@ -1051,147 +1346,188 @@ fn test_root_block_columns_has_types() { .next() .is_some()); - let table_block_nums: HashMap = execute::iter( + assert!(execute::iter( + &mut conn, + r"CREATE TEMPORARY TABLE t3(a TEXT PRIMARY KEY, b REAL NOT NULL, b_null REAL NULL);", + None, + false + ) + .unwrap() + .next() + .is_some()); + + let table_block_nums: HashMap = execute::iter( &mut conn, - r"select name, rootpage from sqlite_master", + r"select name, 0 db_seq, rootpage from main.sqlite_schema UNION ALL select name, 1 db_seq, rootpage from temp.sqlite_schema", None, false, ) .unwrap() .filter_map(|res| res.map(|either| either.right()).transpose()) .map(|row| FromRow::from_row(row.as_ref().unwrap())) + .map(|row| row.map(|(name,seq,block)|(name,(seq,block)))) .collect::, Error>>() .unwrap(); let root_block_cols = root_block_columns(&mut conn).unwrap(); - assert_eq!(6, root_block_cols.len()); + // there should be 7 tables/indexes created explicitly, plus 1 autoindex for t3 + assert_eq!(8, root_block_cols.len()); //prove that we have some information for each table & index - for blocknum in table_block_nums.values() { - assert!(root_block_cols.contains_key(blocknum)); + for (name, db_seq_block) in dbg!(&table_block_nums) { + assert!( + root_block_cols.contains_key(db_seq_block), + "{:?}", + (name, db_seq_block) + ); } //prove that each block has the correct information { - let blocknum = table_block_nums["t"]; + let table_db_block = table_block_nums["t"]; assert_eq!( - ColumnType { + ColumnType::Single { datatype: DataType::Int64, nullable: Some(true) //sqlite primary key columns are nullable unless declared not null }, - root_block_cols[&blocknum][&0] + root_block_cols[&table_db_block][&0] ); assert_eq!( - ColumnType { + ColumnType::Single { datatype: DataType::Text, nullable: Some(true) }, - root_block_cols[&blocknum][&1] + root_block_cols[&table_db_block][&1] ); assert_eq!( - ColumnType { + ColumnType::Single { datatype: DataType::Text, nullable: Some(false) }, - root_block_cols[&blocknum][&2] + root_block_cols[&table_db_block][&2] ); } { - let blocknum = table_block_nums["i1"]; + let table_db_block = table_block_nums["i1"]; assert_eq!( - ColumnType { + ColumnType::Single { datatype: DataType::Int64, nullable: Some(true) //sqlite primary key columns are nullable unless declared not null }, - root_block_cols[&blocknum][&0] + root_block_cols[&table_db_block][&0] ); assert_eq!( - ColumnType { + ColumnType::Single { datatype: DataType::Text, nullable: Some(true) }, - root_block_cols[&blocknum][&1] + root_block_cols[&table_db_block][&1] ); } { - let blocknum = table_block_nums["i2"]; + let table_db_block = table_block_nums["i2"]; assert_eq!( - ColumnType { + ColumnType::Single { datatype: DataType::Int64, nullable: Some(true) //sqlite primary key columns are nullable unless declared not null }, - root_block_cols[&blocknum][&0] + root_block_cols[&table_db_block][&0] ); assert_eq!( - ColumnType { + ColumnType::Single { datatype: DataType::Text, nullable: Some(true) }, - root_block_cols[&blocknum][&1] + root_block_cols[&table_db_block][&1] ); } { - let blocknum = table_block_nums["t2"]; + let table_db_block = table_block_nums["t2"]; assert_eq!( - ColumnType { + ColumnType::Single { datatype: DataType::Int64, nullable: Some(false) }, - root_block_cols[&blocknum][&0] + root_block_cols[&table_db_block][&0] ); assert_eq!( - ColumnType { + ColumnType::Single { datatype: DataType::Null, nullable: Some(true) }, - root_block_cols[&blocknum][&1] + root_block_cols[&table_db_block][&1] ); assert_eq!( - ColumnType { + ColumnType::Single { datatype: DataType::Null, nullable: Some(false) }, - root_block_cols[&blocknum][&2] + root_block_cols[&table_db_block][&2] ); } { - let blocknum = table_block_nums["t2i1"]; + let table_db_block = table_block_nums["t2i1"]; assert_eq!( - ColumnType { + ColumnType::Single { datatype: DataType::Int64, nullable: Some(false) }, - root_block_cols[&blocknum][&0] + root_block_cols[&table_db_block][&0] ); assert_eq!( - ColumnType { + ColumnType::Single { datatype: DataType::Null, nullable: Some(true) }, - root_block_cols[&blocknum][&1] + root_block_cols[&table_db_block][&1] ); } { - let blocknum = table_block_nums["t2i2"]; + let table_db_block = table_block_nums["t2i2"]; assert_eq!( - ColumnType { + ColumnType::Single { datatype: DataType::Int64, nullable: Some(false) }, - root_block_cols[&blocknum][&0] + root_block_cols[&table_db_block][&0] ); assert_eq!( - ColumnType { + ColumnType::Single { datatype: DataType::Null, nullable: Some(false) }, - root_block_cols[&blocknum][&1] + root_block_cols[&table_db_block][&1] + ); + } + + { + let table_db_block = table_block_nums["t3"]; + assert_eq!( + ColumnType::Single { + datatype: DataType::Text, + nullable: Some(true) + }, + root_block_cols[&table_db_block][&0] + ); + assert_eq!( + ColumnType::Single { + datatype: DataType::Float, + nullable: Some(false) + }, + root_block_cols[&table_db_block][&1] + ); + assert_eq!( + ColumnType::Single { + datatype: DataType::Float, + nullable: Some(true) + }, + root_block_cols[&table_db_block][&2] ); } } diff --git a/tests/sqlite/describe.rs b/tests/sqlite/describe.rs index ffd088badc..22121bbd1e 100644 --- a/tests/sqlite/describe.rs +++ b/tests/sqlite/describe.rs @@ -88,6 +88,56 @@ async fn it_describes_expression() -> anyhow::Result<()> { Ok(()) } +#[sqlx_macros::test] +async fn it_describes_temporary_table() -> anyhow::Result<()> { + let mut conn = new::().await?; + + conn.execute( + "CREATE TEMPORARY TABLE IF NOT EXISTS empty_all_types_and_nulls( + i1 integer NULL, + r1 real NULL, + t1 text NULL, + b1 blob NULL, + i2 INTEGER NOT NULL, + r2 REAL NOT NULL, + t2 TEXT NOT NULL, + b2 BLOB NOT NULL + )", + ) + .await?; + + let d = conn + .describe("SELECT * FROM empty_all_types_and_nulls") + .await?; + assert_eq!(d.columns().len(), 8); + + assert_eq!(d.column(0).type_info().name(), "INTEGER"); + assert_eq!(d.nullable(0), Some(true)); + + assert_eq!(d.column(1).type_info().name(), "REAL"); + assert_eq!(d.nullable(1), Some(true)); + + assert_eq!(d.column(2).type_info().name(), "TEXT"); + assert_eq!(d.nullable(2), Some(true)); + + assert_eq!(d.column(3).type_info().name(), "BLOB"); + assert_eq!(d.nullable(3), Some(true)); + + assert_eq!(d.column(4).type_info().name(), "INTEGER"); + assert_eq!(d.nullable(4), Some(false)); + + assert_eq!(d.column(5).type_info().name(), "REAL"); + assert_eq!(d.nullable(5), Some(false)); + + assert_eq!(d.column(6).type_info().name(), "TEXT"); + assert_eq!(d.nullable(6), Some(false)); + + assert_eq!(d.column(7).type_info().name(), "BLOB"); + assert_eq!(d.nullable(7), Some(false)); + + Ok(()) +} + #[sqlx_macros::test] async fn it_describes_expression_from_empty_table() -> anyhow::Result<()> { let mut conn = new::().await?; @@ -301,6 +351,52 @@ async fn it_describes_left_join() -> anyhow::Result<()> { Ok(()) } +#[sqlx_macros::test] +async fn it_describes_group_by() -> anyhow::Result<()> { + let mut conn = new::().await?; + + let d = conn.describe("select id from accounts group by id").await?; + assert_eq!(d.column(0).type_info().name(), "INTEGER"); + assert_eq!(d.nullable(0), Some(false)); + + let d = conn + .describe("SELECT name from accounts GROUP BY 1 LIMIT -1 OFFSET 1") + .await?; + assert_eq!(d.column(0).type_info().name(), "TEXT"); + assert_eq!(d.nullable(0), Some(false)); + + let d = conn + .describe("SELECT sum(id), sum(is_sent) from tweet GROUP BY owner_id") + .await?; + assert_eq!(d.column(0).type_info().name(), "INTEGER"); + assert_eq!(d.nullable(0), Some(false)); + assert_eq!(d.column(1).type_info().name(), "INTEGER"); + assert_eq!(d.nullable(1), Some(false)); + + Ok(()) +} + +#[sqlx_macros::test] +async fn it_describes_ungrouped_aggregate() -> anyhow::Result<()> { + let mut conn = new::().await?; + + let d = conn.describe("select count(1) from accounts").await?; + assert_eq!(d.column(0).type_info().name(), "INTEGER"); + assert_eq!(d.nullable(0), Some(false)); + + let d = conn.describe("SELECT sum(is_sent) from tweet").await?; + assert_eq!(d.column(0).type_info().name(), "INTEGER"); + assert_eq!(d.nullable(0), Some(true)); + + let d = conn + .describe("SELECT coalesce(sum(is_sent),0) from tweet") + .await?; + assert_eq!(d.column(0).type_info().name(), "INTEGER"); + assert_eq!(d.nullable(0), Some(false)); + + Ok(()) +} + #[sqlx_macros::test] async fn it_describes_literal_subquery() -> anyhow::Result<()> { async fn assert_literal_described( @@ -421,6 +517,16 @@ async fn it_describes_table_order_by() -> anyhow::Result<()> { .await?; assert_literal_order_by_described(&mut conn, "SELECT 'a', text FROM tweet ORDER BY text") .await?; + assert_literal_order_by_described( + &mut conn, + "SELECT 'a', text FROM tweet ORDER BY text NULLS LAST", + ) + .await?; + assert_literal_order_by_described( + &mut conn, + "SELECT 'a', text FROM tweet ORDER BY text DESC NULLS LAST", + ) + .await?; Ok(()) } @@ -469,3 +575,277 @@ async fn it_describes_union() -> anyhow::Result<()> { Ok(()) } + +//documents failures originally found through property testing +#[sqlx_macros::test] +async fn it_describes_strange_queries() -> anyhow::Result<()> { + async fn assert_single_column_described( + conn: &mut sqlx::SqliteConnection, + query: &str, + typename: &str, + nullable: bool, + ) -> anyhow::Result<()> { + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), typename, "{}", query); + assert_eq!(info.nullable(0), Some(nullable), "{}", query); + + Ok(()) + } + + let mut conn = new::().await?; + + assert_single_column_described( + &mut conn, + "SELECT true FROM (SELECT true) a ORDER BY true", + "INTEGER", + false, + ) + .await?; + + assert_single_column_described( + &mut conn, + " + SELECT true + FROM ( + SELECT 'a' + ) + CROSS JOIN ( + SELECT 'b' + FROM (SELECT 'c') + CROSS JOIN accounts + ORDER BY id + LIMIT 1 + ) + ", + "INTEGER", + false, + ) + .await?; + + assert_single_column_described( + &mut conn, + "SELECT true FROM tweet + ORDER BY true ASC NULLS LAST", + "INTEGER", + false, + ) + .await?; + + assert_single_column_described( + &mut conn, + "SELECT true LIMIT -1 OFFSET -1", + "INTEGER", + false, + ) + .await?; + + assert_single_column_described( + &mut conn, + "SELECT true FROM tweet J LIMIT 10 OFFSET 1000000", + "INTEGER", + false, + ) + .await?; + + assert_single_column_described( + &mut conn, + "SELECT text + FROM (SELECT null) + CROSS JOIN ( + SELECT text + FROM tweet + GROUP BY text + ) + LIMIT -1 OFFSET -1", + "TEXT", + false, + ) + .await?; + + assert_single_column_described( + &mut conn, + "SELECT EYH.id,COUNT(EYH.id) + FROM accounts EYH", + "INTEGER", + true, + ) + .await?; + + assert_single_column_described( + &mut conn, + "SELECT SUM(tweet.text) FROM (SELECT NULL FROM accounts_view LIMIT -1 OFFSET 1) CROSS JOIN tweet", + "REAL", + true, // null if accounts view has fewer rows than the offset + ) + .await?; + + Ok(()) +} + +#[sqlx_macros::test] +async fn it_describes_func_date() -> anyhow::Result<()> { + let mut conn = new::().await?; + + let query = "SELECT date();"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(false), "{}", query); + + let query = "SELECT date('now');"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); //can't prove that it's not-null yet + + let query = "SELECT date('now', 'start of month');"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); //can't prove that it's not-null yet + + let query = "SELECT date(:datebind);"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); + Ok(()) +} + +#[sqlx_macros::test] +async fn it_describes_func_time() -> anyhow::Result<()> { + let mut conn = new::().await?; + + let query = "SELECT time();"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(false), "{}", query); + + let query = "SELECT time('now');"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); //can't prove that it's not-null yet + + let query = "SELECT time('now', 'start of month');"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); //can't prove that it's not-null yet + + let query = "SELECT time(:datebind);"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); + Ok(()) +} + +#[sqlx_macros::test] +async fn it_describes_func_datetime() -> anyhow::Result<()> { + let mut conn = new::().await?; + + let query = "SELECT datetime();"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(false), "{}", query); + + let query = "SELECT datetime('now');"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); //can't prove that it's not-null yet + + let query = "SELECT datetime('now', 'start of month');"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); //can't prove that it's not-null yet + + let query = "SELECT datetime(:datebind);"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); + Ok(()) +} + +#[sqlx_macros::test] +async fn it_describes_func_julianday() -> anyhow::Result<()> { + let mut conn = new::().await?; + + let query = "SELECT julianday();"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "REAL", "{}", query); + assert_eq!(info.nullable(0), Some(false), "{}", query); + + let query = "SELECT julianday('now');"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "REAL", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); //can't prove that it's not-null yet + + let query = "SELECT julianday('now', 'start of month');"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "REAL", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); //can't prove that it's not-null yet + + let query = "SELECT julianday(:datebind);"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "REAL", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); + Ok(()) +} + +#[sqlx_macros::test] +async fn it_describes_func_strftime() -> anyhow::Result<()> { + let mut conn = new::().await?; + + let query = "SELECT strftime('%s','now');"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); //can't prove that it's not-null yet + + let query = "SELECT strftime('%s', 'now', 'start of month');"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); //can't prove that it's not-null yet + + let query = "SELECT strftime('%s',:datebind);"; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); + Ok(()) +} + +#[sqlx_macros::test] +async fn it_describes_with_recursive() -> anyhow::Result<()> { + let mut conn = new::().await?; + + let query = " + WITH RECURSIVE schedule(begin_date) AS ( + SELECT datetime('2022-10-01') + WHERE datetime('2022-10-01') < datetime('2022-11-03') + UNION ALL + SELECT datetime(begin_date,'+1 day') + FROM schedule + WHERE datetime(begin_date) < datetime(?2) + ) + SELECT + begin_date + FROM schedule + GROUP BY begin_date + "; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); + + let query = " + WITH RECURSIVE schedule(begin_date) AS MATERIALIZED ( + SELECT datetime('2022-10-01') + WHERE datetime('2022-10-01') < datetime('2022-11-03') + UNION ALL + SELECT datetime(begin_date,'+1 day') + FROM schedule + WHERE datetime(begin_date) < datetime(?2) + ) + SELECT + begin_date + FROM schedule + GROUP BY begin_date + "; + let info = conn.describe(query).await?; + assert_eq!(info.column(0).type_info().name(), "TEXT", "{}", query); + assert_eq!(info.nullable(0), Some(true), "{}", query); + + Ok(()) +}