From 380b258ef40f3f9c461f6867aee3d3aef9c38cba Mon Sep 17 00:00:00 2001 From: Aspen Smith Date: Sun, 23 Jul 2023 11:33:33 -0400 Subject: [PATCH] sql: Support IN conditions with a subquery Support IN exprs with subqueries on the RHS in the WHERE clause of queries, by compiling them to an INNER JOIN with a DISTINCT on the rhs Release-Note-Core: Add support for queries with a WHERE clause containing IN with subqueries on the right-hand side Change-Id: Iddb8a385a01b423272c0ad8c0197324538217269 Reviewed-on: https://gerrit.readyset.name/c/readyset/+/5522 Tested-by: Buildkite CI Reviewed-by: Dan Wilbanks --- logictests/in_subquery.test | 46 ++++++++++ readyset-server/src/controller/sql/mir/mod.rs | 88 ++++++++++++++++++- .../src/controller/sql/query_graph.rs | 4 +- 3 files changed, 133 insertions(+), 5 deletions(-) create mode 100644 logictests/in_subquery.test diff --git a/logictests/in_subquery.test b/logictests/in_subquery.test new file mode 100644 index 0000000000..c818314fc0 --- /dev/null +++ b/logictests/in_subquery.test @@ -0,0 +1,46 @@ +statement ok +create table t1 (x int, y int); + +statement ok +create table t2 (x int, y int); + +statement ok +insert into t1 (x, y) +values +(1, 1), +(2, 2), +(3, 3); + +statement ok +insert into t2 (x, y) +values +(1, 1), +(1, 1); + + +query I nosort +select count(*) from t1 where x in (select x from t2); +---- +1 + + +query I nosort +select y from t1 where x in (select x + 1 from t2 where y = 1); +---- +2 + + +query I nosort +select y from t1 where (x + 1) in (select x + 1 from t2 where y = 1); +---- +1 + + +query I rowsort +select y from t1 +where +(x + 1) in (select x + 1 from t2 where y = 1) +OR y = 2 +---- +1 +2 diff --git a/readyset-server/src/controller/sql/mir/mod.rs b/readyset-server/src/controller/sql/mir/mod.rs index 338fee55b3..9eb7ce2168 100644 --- a/readyset-server/src/controller/sql/mir/mod.rs +++ b/readyset-server/src/controller/sql/mir/mod.rs @@ -18,15 +18,15 @@ pub use mir::{Column, NodeIndex}; use nom_sql::analysis::ReferredColumns; use nom_sql::{ BinaryOperator, ColumnSpecification, CompoundSelectOperator, CreateTableBody, Expr, - FieldDefinitionExpr, FieldReference, FunctionExpr, LimitClause, Literal, OrderClause, + FieldDefinitionExpr, FieldReference, FunctionExpr, InValue, LimitClause, Literal, OrderClause, OrderType, Relation, SqlIdentifier, TableKey, }; use petgraph::visit::Reversed; use petgraph::Direction; use readyset_client::ViewPlaceholder; use readyset_errors::{ - internal, internal_err, invalid_err, invariant, invariant_eq, unsupported, ReadySetError, - ReadySetResult, + internal, internal_err, invalid, invalid_err, invariant, invariant_eq, unsupported, + ReadySetError, ReadySetResult, }; use readyset_sql_passes::is_correlated; use readyset_util::redacted::Sensitive; @@ -1329,6 +1329,88 @@ impl SqlToMirConverter { Expr::Call(_) => { internal!("Function calls should have been handled by projection earlier") } + Expr::In { + lhs, + rhs: InValue::Subquery(subquery), + negated, + } => { + // σ[lhs IN (π[x]R₂)](R₁) + // + // is compiled like + // + // R₁ ⋈[lhs ≡ rhs] π[DISTINCT x AS rhs](R₂) + + if *negated { + unsupported!("NOT IN is not yet supported") + } + let (lhs, parent) = match &**lhs { + Expr::Column(col) => (col.clone(), parent), + expr => { + // The lhs is a non-column expr, so we need to project it first + let label = lhs.display(nom_sql::Dialect::MySQL).to_string(); + let prj = self.make_project_node( + query_name, + self.generate_label(&"in_lhs_project".into()), + parent, + self.mir_graph + .columns(parent) + .into_iter() + .map(ProjectExpr::Column) + .chain(iter::once(ProjectExpr::Expr { + alias: label.clone().into(), + expr: expr.clone(), + })) + .collect(), + ); + ( + nom_sql::Column { + name: label.into(), + table: None, + }, + prj, + ) + } + }; + + let query_graph = to_query_graph((**subquery).clone())?; + let subquery_leaf = self.named_query_to_mir( + query_name, + &query_graph, + &HashMap::new(), + LeafBehavior::Anonymous, + )?; + + let cols = self.columns(subquery_leaf); + if cols.len() != 1 { + invalid!("Subquery on right-hand side of IN must have exactly one column"); + } + let col = cols.into_iter().next().expect("Just checked"); + let distinct = self.make_distinct_node( + query_name, + self.generate_label(&"in_subquery_distinct".into()), + subquery_leaf, + vec![col.clone()], + ); + + self.make_join_node( + query_name, + self.generate_label(&"join_in_subquery".into()), + &[JoinPredicate { + left: lhs, + right: nom_sql::Column { + name: col.name, + table: col.table, + }, + }], + parent, + distinct, + if is_correlated(subquery) { + JoinKind::Dependent + } else { + JoinKind::Inner + }, + )? + } Expr::NestedSelect(_) => unsupported!("Nested selects not supported in filters"), _ => self.make_filter_node( query_name, diff --git a/readyset-server/src/controller/sql/query_graph.rs b/readyset-server/src/controller/sql/query_graph.rs index 717f608de0..2a93cbe452 100644 --- a/readyset-server/src/controller/sql/query_graph.rs +++ b/readyset-server/src/controller/sql/query_graph.rs @@ -642,8 +642,8 @@ fn classify_conditionals( Expr::In { rhs: InValue::Subquery(..), .. - } => unsupported!("IN with subqueries is not yet supported"), - Expr::Call(_) + } + | Expr::Call(_) | Expr::Literal(_) | Expr::UnaryOp { .. } | Expr::OpAny { .. }