Skip to content

Commit

Permalink
feat(sqlsmith): test struct and list (#5951)
Browse files Browse the repository at this point in the history
Add more input types to sqlsmith (`struct`, `list`).

### Motivation for refactoring `DataTypeName`

- Previously we use `DataTypeName` internally to indicate which expressions to generate.
- This is insufficient, since it elides `struct` and `list` internal types.
- Now we use `DataType` directly.
- In this PR we support generating these new variants as scalar values, and columns.
- Other PR may support generating functions, see: #7132.
- This will likely work by generating some variants of structs and lists we can choose from,
- During setup: insert function signatures which can work with these structs and lists.
- During setup: Define relations with these variants as columns.

### Misc

- Disable struct scalar due to #7189
- Disable timestamptz due to #5826

Approved-By: lmatz

Co-Authored-By: Tao Wu <wutao@singularity-data.com>
Co-Authored-By: Noel Kwan <noelkwan1998@gmail.com>
Co-Authored-By: Noel Kwan <47273164+kwannoel@users.noreply.github.com>
  • Loading branch information
3 people committed Jan 4, 2023
1 parent 1ba6981 commit 98ae6ce
Show file tree
Hide file tree
Showing 14 changed files with 407 additions and 131 deletions.
2 changes: 1 addition & 1 deletion src/expr/src/sig/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use std::sync::LazyLock;
use parse_display::Display;
use risingwave_common::types::DataTypeName;

#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct CastSig {
pub from_type: DataTypeName,
pub to_type: DataTypeName,
Expand Down
5 changes: 4 additions & 1 deletion src/tests/sqlsmith/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ pub fn create_table_statement_to_table(statement: &Statement) -> Table {
name: name.0[0].real_value(),
columns: columns.iter().map(|c| c.clone().into()).collect(),
},
_ => panic!("Unexpected statement: {}", statement),
_ => panic!(
"Only CREATE TABLE statements permitted, received: {}",
statement
),
}
}
4 changes: 2 additions & 2 deletions src/tests/sqlsmith/src/runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ async fn test_stream_queries<R: Rng>(
}

fn get_seed_table_sql(testdata: &str) -> String {
let seed_files = vec!["tpch.sql", "nexmark.sql"];
let seed_files = vec!["tpch.sql", "nexmark.sql", "alltypes.sql"];
seed_files
.iter()
.map(|filename| std::fs::read_to_string(format!("{}/{}", testdata, filename)).unwrap())
Expand Down Expand Up @@ -172,7 +172,7 @@ async fn drop_tables(mviews: &[Table], testdata: &str, client: &tokio_postgres::
drop_mview_table(mview, client).await;
}

let seed_files = vec!["drop_tpch.sql", "drop_nexmark.sql"];
let seed_files = vec!["drop_tpch.sql", "drop_nexmark.sql", "drop_alltypes.sql"];
let sql = seed_files
.iter()
.map(|filename| std::fs::read_to_string(format!("{}/{}", testdata, filename)).unwrap())
Expand Down
96 changes: 33 additions & 63 deletions src/tests/sqlsmith/src/sql_gen/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,54 +12,20 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::collections::HashMap;
use std::sync::LazyLock;

use itertools::Itertools;
use rand::seq::SliceRandom;
use rand::Rng;
use risingwave_common::types::DataTypeName;
use risingwave_common::types::DataType;
use risingwave_expr::expr::AggKind;
use risingwave_frontend::expr::{
agg_func_sigs, cast_sigs, func_sigs, AggFuncSig, CastContext, CastSig, ExprType, FuncSign,
};
use risingwave_frontend::expr::{agg_func_sigs, cast_sigs, func_sigs, CastContext, ExprType};
use risingwave_sqlparser::ast::{
BinaryOperator, Expr, Function, FunctionArg, FunctionArgExpr, Ident, ObjectName,
TrimWhereField, UnaryOperator, Value,
};

use crate::sql_gen::utils::data_type_name_to_ast_data_type;
use crate::sql_gen::types::{data_type_to_ast_data_type, AGG_FUNC_TABLE, CAST_TABLE, FUNC_TABLE};
use crate::sql_gen::{SqlGenerator, SqlGeneratorContext};

static FUNC_TABLE: LazyLock<HashMap<DataTypeName, Vec<FuncSign>>> = LazyLock::new(|| {
let mut funcs = HashMap::<DataTypeName, Vec<FuncSign>>::new();
func_sigs().for_each(|func| funcs.entry(func.ret_type).or_default().push(func.clone()));
funcs
});

static AGG_FUNC_TABLE: LazyLock<HashMap<DataTypeName, Vec<AggFuncSig>>> = LazyLock::new(|| {
let mut funcs = HashMap::<DataTypeName, Vec<AggFuncSig>>::new();
agg_func_sigs().for_each(|func| funcs.entry(func.ret_type).or_default().push(func.clone()));
funcs
});

/// Build a cast map from return types to viable cast-signatures.
/// NOTE: We avoid cast from varchar to other datatypes apart from itself.
/// This is because arbitrary strings may not be able to cast,
/// creating large number of invalid queries.
static CAST_TABLE: LazyLock<HashMap<DataTypeName, Vec<CastSig>>> = LazyLock::new(|| {
let mut casts = HashMap::<DataTypeName, Vec<CastSig>>::new();
cast_sigs()
.filter(|cast| {
cast.context == CastContext::Explicit || cast.context == CastContext::Implicit
})
.filter(|cast| {
cast.from_type != DataTypeName::Varchar || cast.to_type == DataTypeName::Varchar
})
.for_each(|cast| casts.entry(cast.to_type).or_default().push(cast));
casts
});

impl<'a, R: Rng> SqlGenerator<'a, R> {
/// In generating expression, there are two execution modes:
/// 1) Can have Aggregate expressions (`can_agg` = true)
Expand All @@ -69,7 +35,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
/// Only columns present in GROUP BY can be selected.
///
/// `inside_agg` indicates if we are calling `gen_expr` inside an aggregate.
pub(crate) fn gen_expr(&mut self, typ: DataTypeName, context: SqlGeneratorContext) -> Expr {
pub(crate) fn gen_expr(&mut self, typ: &DataType, context: SqlGeneratorContext) -> Expr {
if !self.can_recurse() {
// Stop recursion with a simple scalar or column.
return match self.rng.gen_bool(0.5) {
Expand All @@ -91,7 +57,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
}
}

fn gen_col(&mut self, typ: DataTypeName, context: SqlGeneratorContext) -> Expr {
fn gen_col(&mut self, typ: &DataType, context: SqlGeneratorContext) -> Expr {
let columns = if context.is_inside_agg() {
if self.bound_relations.is_empty() {
return self.gen_simple_scalar(typ);
Expand All @@ -109,7 +75,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {

let matched_cols = columns
.iter()
.filter(|col| col.data_type == typ)
.filter(|col| col.data_type == *typ)
.collect::<Vec<_>>();
if matched_cols.is_empty() {
self.gen_simple_scalar(typ)
Expand All @@ -119,24 +85,24 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
}
}

fn gen_cast(&mut self, ret: DataTypeName, context: SqlGeneratorContext) -> Expr {
fn gen_cast(&mut self, ret: &DataType, context: SqlGeneratorContext) -> Expr {
self.gen_cast_inner(ret, context)
.unwrap_or_else(|| self.gen_simple_scalar(ret))
}

/// Generate casts from a cast map.
/// TODO: Assign casts have to be tested via `INSERT`.
fn gen_cast_inner(&mut self, ret: DataTypeName, context: SqlGeneratorContext) -> Option<Expr> {
let casts = CAST_TABLE.get(&ret)?;
fn gen_cast_inner(&mut self, ret: &DataType, context: SqlGeneratorContext) -> Option<Expr> {
let casts = CAST_TABLE.get(ret)?;
let cast_sig = casts.choose(&mut self.rng).unwrap();

use CastContext as T;
match cast_sig.context {
T::Explicit => {
let expr = self
.gen_expr(cast_sig.from_type, context.set_inside_explicit_cast())
.gen_expr(&cast_sig.from_type, context.set_inside_explicit_cast())
.into();
let data_type = data_type_name_to_ast_data_type(cast_sig.to_type)?;
let data_type = data_type_to_ast_data_type(&cast_sig.to_type);
Some(Expr::Cast { expr, data_type })
}

Expand Down Expand Up @@ -166,7 +132,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
}
}

fn gen_func(&mut self, ret: DataTypeName, context: SqlGeneratorContext) -> Expr {
fn gen_func(&mut self, ret: &DataType, context: SqlGeneratorContext) -> Expr {
match self.rng.gen_bool(0.1) {
true => self.gen_variadic_func(ret, context),
false => self.gen_fixed_func(ret, context),
Expand All @@ -175,8 +141,8 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {

/// Generates functions with variable arity:
/// `CASE`, `COALESCE`, `CONCAT`, `CONCAT_WS`
fn gen_variadic_func(&mut self, ret: DataTypeName, context: SqlGeneratorContext) -> Expr {
use DataTypeName as T;
fn gen_variadic_func(&mut self, ret: &DataType, context: SqlGeneratorContext) -> Expr {
use DataType as T;
match ret {
T::Varchar => match self.rng.gen_range(0..=3) {
0 => self.gen_case(ret, context),
Expand All @@ -189,20 +155,22 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
true => self.gen_case(ret, context),
false => self.gen_coalesce(ret, context),
},
// TODO: gen_regexpr
// TODO: gen functions which return list, struct
}
}

fn gen_case(&mut self, ret: DataTypeName, context: SqlGeneratorContext) -> Expr {
fn gen_case(&mut self, ret: &DataType, context: SqlGeneratorContext) -> Expr {
let n = self.rng.gen_range(1..10);
Expr::Case {
operand: None,
conditions: self.gen_n_exprs_with_type(n, DataTypeName::Boolean, context),
conditions: self.gen_n_exprs_with_type(n, &DataType::Boolean, context),
results: self.gen_n_exprs_with_type(n, ret, context),
else_result: Some(Box::new(self.gen_expr(ret, context))),
}
}

fn gen_coalesce(&mut self, ret: DataTypeName, context: SqlGeneratorContext) -> Expr {
fn gen_coalesce(&mut self, ret: &DataType, context: SqlGeneratorContext) -> Expr {
let non_null = self.gen_expr(ret, context);
let position = self.rng.gen_range(0..10);
let mut args = (0..10).map(|_| Expr::Value(Value::Null)).collect_vec();
Expand All @@ -215,37 +183,37 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
}

fn gen_concat_ws(&mut self, context: SqlGeneratorContext) -> Expr {
let sep = self.gen_expr(DataTypeName::Varchar, context);
let sep = self.gen_expr(&DataType::Varchar, context);
let mut args = self.gen_concat_args(context);
args.insert(0, sep);
Expr::Function(make_simple_func("concat_ws", &args))
}

fn gen_concat_args(&mut self, context: SqlGeneratorContext) -> Vec<Expr> {
let n = self.rng.gen_range(1..10);
self.gen_n_exprs_with_type(n, DataTypeName::Varchar, context)
self.gen_n_exprs_with_type(n, &DataType::Varchar, context)
}

/// Generates `n` expressions of type `ret`.
fn gen_n_exprs_with_type(
&mut self,
n: usize,
ret: DataTypeName,
ret: &DataType,
context: SqlGeneratorContext,
) -> Vec<Expr> {
(0..n).map(|_| self.gen_expr(ret, context)).collect()
}

fn gen_fixed_func(&mut self, ret: DataTypeName, context: SqlGeneratorContext) -> Expr {
let funcs = match FUNC_TABLE.get(&ret) {
fn gen_fixed_func(&mut self, ret: &DataType, context: SqlGeneratorContext) -> Expr {
let funcs = match FUNC_TABLE.get(ret) {
None => return self.gen_simple_scalar(ret),
Some(funcs) => funcs,
};
let func = funcs.choose(&mut self.rng).unwrap();
let exprs: Vec<Expr> = func
.inputs_type
.iter()
.map(|t| self.gen_expr(*t, context))
.map(|t| self.gen_expr(t, context))
.collect();
let expr = if exprs.len() == 1 {
make_unary_op(func.func, &exprs[0])
Expand All @@ -258,13 +226,13 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
.unwrap_or_else(|| self.gen_simple_scalar(ret))
}

fn gen_exists(&mut self, ret: DataTypeName, context: SqlGeneratorContext) -> Expr {
fn gen_exists(&mut self, ret: &DataType, context: SqlGeneratorContext) -> Expr {
// TODO: Streaming nested loop join is not implemented yet.
// Tracked by: <https://github.com/singularity-data/risingwave/issues/2655>.

// Generation of subquery inside aggregation is now workaround.
// Tracked by: <https://github.com/risingwavelabs/risingwave/issues/3896>.
if self.is_mview || ret != DataTypeName::Boolean || context.can_gen_agg() {
if self.is_mview || *ret != DataType::Boolean || context.can_gen_agg() {
return self.gen_simple_scalar(ret);
};
// TODO: Feature is not yet implemented: correlated subquery in HAVING or SELECT with agg
Expand All @@ -274,12 +242,12 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
Expr::Exists(Box::new(subquery))
}

fn gen_agg(&mut self, ret: DataTypeName) -> Expr {
fn gen_agg(&mut self, ret: &DataType) -> Expr {
// TODO: workaround for <https://github.com/risingwavelabs/risingwave/issues/4508>
if ret == DataTypeName::Interval {
if *ret == DataType::Interval {
return self.gen_simple_scalar(ret);
}
let funcs = match AGG_FUNC_TABLE.get(&ret) {
let funcs = match AGG_FUNC_TABLE.get(ret) {
None => return self.gen_simple_scalar(ret),
Some(funcs) => funcs,
};
Expand All @@ -290,7 +258,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
let exprs: Vec<Expr> = func
.inputs_type
.iter()
.map(|t| self.gen_expr(*t, context))
.map(|t| self.gen_expr(t, context))
.collect();

let distinct = self.flip_coin() && self.is_distinct_allowed;
Expand Down Expand Up @@ -484,6 +452,8 @@ fn make_bin_op(func: ExprType, exprs: &[Expr]) -> Option<Expr> {
})
}

/// Generates a `NULL` value.
/// TODO(Noel): Generate null for other scalar values.
pub(crate) fn sql_null() -> Expr {
Expr::Value(Value::Null)
}
Expand Down
13 changes: 7 additions & 6 deletions src/tests/sqlsmith/src/sql_gen/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
//! Provides Data structures for query generation,
//! and the interface for generating
//! stream (MATERIALIZED VIEW) and batch query statements.

use std::vec;

use rand::Rng;
use risingwave_common::types::DataTypeName;
use risingwave_common::types::DataType;
use risingwave_frontend::bind_data_type;
use risingwave_sqlparser::ast::{ColumnDef, Expr, Ident, ObjectName, Statement};

Expand All @@ -29,6 +30,7 @@ mod query;
mod relation;
mod scalar;
mod time_window;
mod types;
mod utils;

#[derive(Clone, Debug)]
Expand All @@ -47,25 +49,24 @@ impl Table {
.iter()
.map(|c| Column {
name: format!("{}.{}", self.name, c.name),
data_type: c.data_type,
data_type: c.data_type.clone(),
})
.collect()
}
}

/// Sqlsmith Column definition
#[derive(Clone, Debug)]
pub struct Column {
name: String,
data_type: DataTypeName,
data_type: DataType,
}

impl From<ColumnDef> for Column {
fn from(c: ColumnDef) -> Self {
Self {
name: c.name.real_value(),
data_type: bind_data_type(&c.data_type.expect("data type should not be none"))
.unwrap()
.into(),
data_type: bind_data_type(&c.data_type.expect("data type should not be none")).unwrap(),
}
}
}
Expand Down

0 comments on commit 98ae6ce

Please sign in to comment.