Skip to content

Commit

Permalink
Minor: return NULL for range and generate_series (apache#10275)
Browse files Browse the repository at this point in the history
* return NULL for range and generate_series

* Update datafusion/sqllogictest/test_files/array.slt

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* Update datafusion/sqllogictest/test_files/array.slt

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
Lordworms and alamb committed Apr 29, 2024
1 parent 0f2a68e commit dd56837
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 163 deletions.
45 changes: 30 additions & 15 deletions datafusion/functions-array/src/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,12 @@

//! [`ScalarUDFImpl`] definitions for range and gen_series functions.

use crate::utils::make_scalar_function;
use arrow::array::{Array, ArrayRef, Int64Array, ListArray};
use arrow::datatypes::{DataType, Field};
use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
use std::any::Any;

use crate::utils::make_scalar_function;
use arrow_array::types::{Date32Type, IntervalMonthDayNanoType};
use arrow_array::Date32Array;
use arrow_array::{Date32Array, NullArray};
use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
use arrow_schema::DataType::{Date32, Int64, Interval, List};
use arrow_schema::IntervalUnit::MonthDayNano;
use datafusion_common::cast::{as_date32_array, as_int64_array, as_interval_mdn_array};
Expand All @@ -34,6 +32,7 @@ use datafusion_expr::Expr;
use datafusion_expr::{
ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility,
};
use std::any::Any;
use std::sync::Arc;

make_udf_function!(
Expand All @@ -57,6 +56,7 @@ impl Range {
TypeSignature::Exact(vec![Int64, Int64]),
TypeSignature::Exact(vec![Int64, Int64, Int64]),
TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]),
TypeSignature::Any(3),
],
Volatility::Immutable,
),
Expand All @@ -77,14 +77,21 @@ impl ScalarUDFImpl for Range {
}

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(List(Arc::new(Field::new(
"item",
arg_types[0].clone(),
true,
))))
if arg_types.iter().any(|t| t.eq(&DataType::Null)) {
Ok(DataType::Null)
} else {
Ok(List(Arc::new(Field::new(
"item",
arg_types[0].clone(),
true,
))))
}
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
if args.iter().any(|arg| arg.data_type() == DataType::Null) {
return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1))));
}
match args[0].data_type() {
Int64 => make_scalar_function(|args| gen_range_inner(args, false))(args),
Date32 => make_scalar_function(|args| gen_range_date(args, false))(args),
Expand Down Expand Up @@ -120,6 +127,7 @@ impl GenSeries {
TypeSignature::Exact(vec![Int64, Int64]),
TypeSignature::Exact(vec![Int64, Int64, Int64]),
TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]),
TypeSignature::Any(3),
],
Volatility::Immutable,
),
Expand All @@ -140,14 +148,21 @@ impl ScalarUDFImpl for GenSeries {
}

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(List(Arc::new(Field::new(
"item",
arg_types[0].clone(),
true,
))))
if arg_types.iter().any(|t| t.eq(&DataType::Null)) {
Ok(DataType::Null)
} else {
Ok(List(Arc::new(Field::new(
"item",
arg_types[0].clone(),
true,
))))
}
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
if args.iter().any(|arg| arg.data_type() == DataType::Null) {
return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1))));
}
match args[0].data_type() {
Int64 => make_scalar_function(|args| gen_range_inner(args, true))(args),
Date32 => make_scalar_function(|args| gen_range_date(args, true))(args),
Expand Down
140 changes: 0 additions & 140 deletions datafusion/functions-array/src/udf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,146 +166,6 @@ impl ScalarUDFImpl for StringToArray {
}
}

make_udf_function!(
Range,
range,
start stop step,
"create a list of values in the range between start and stop",
range_udf
);
#[derive(Debug)]
pub struct Range {
signature: Signature,
aliases: Vec<String>,
}
impl Range {
pub fn new() -> Self {
use DataType::*;
Self {
signature: Signature::one_of(
vec![
TypeSignature::Exact(vec![Int64]),
TypeSignature::Exact(vec![Int64, Int64]),
TypeSignature::Exact(vec![Int64, Int64, Int64]),
TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]),
],
Volatility::Immutable,
),
aliases: vec![String::from("range")],
}
}
}
impl ScalarUDFImpl for Range {
fn as_any(&self) -> &dyn Any {
self
}
fn name(&self) -> &str {
"range"
}

fn signature(&self) -> &Signature {
&self.signature
}

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
use DataType::*;
Ok(List(Arc::new(Field::new(
"item",
arg_types[0].clone(),
true,
))))
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(args)?;
match args[0].data_type() {
arrow::datatypes::DataType::Int64 => {
crate::kernels::gen_range(&args, false).map(ColumnarValue::Array)
}
arrow::datatypes::DataType::Date32 => {
crate::kernels::gen_range_date(&args, false).map(ColumnarValue::Array)
}
_ => {
exec_err!("unsupported type for range")
}
}
}

fn aliases(&self) -> &[String] {
&self.aliases
}
}

make_udf_function!(
GenSeries,
gen_series,
start stop step,
"create a list of values in the range between start and stop, include upper bound",
gen_series_udf
);
#[derive(Debug)]
pub struct GenSeries {
signature: Signature,
aliases: Vec<String>,
}
impl GenSeries {
pub fn new() -> Self {
use DataType::*;
Self {
signature: Signature::one_of(
vec![
TypeSignature::Exact(vec![Int64]),
TypeSignature::Exact(vec![Int64, Int64]),
TypeSignature::Exact(vec![Int64, Int64, Int64]),
TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]),
],
Volatility::Immutable,
),
aliases: vec![String::from("generate_series")],
}
}
}
impl ScalarUDFImpl for GenSeries {
fn as_any(&self) -> &dyn Any {
self
}
fn name(&self) -> &str {
"generate_series"
}

fn signature(&self) -> &Signature {
&self.signature
}

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
use DataType::*;
Ok(List(Arc::new(Field::new(
"item",
arg_types[0].clone(),
true,
))))
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(args)?;
match args[0].data_type() {
arrow::datatypes::DataType::Int64 => {
crate::kernels::gen_range(&args, true).map(ColumnarValue::Array)
}
arrow::datatypes::DataType::Date32 => {
crate::kernels::gen_range_date(&args, true).map(ColumnarValue::Array)
}
_ => {
exec_err!("unsupported type for range")
}
}
}

fn aliases(&self) -> &[String] {
&self.aliases
}
}

make_udf_function!(
ArrayDims,
array_dims,
Expand Down
37 changes: 29 additions & 8 deletions datafusion/sqllogictest/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -5634,15 +5634,26 @@ select range(NULL)
----
NULL

## should throw error
query error
## should return NULL
query ?
select range(DATE '1992-09-01', NULL, INTERVAL '1' YEAR);
----
NULL

query error
query ?
select range(DATE '1992-09-01', DATE '1993-03-01', NULL);
----
NULL

query error
query ?
select range(NULL, DATE '1993-03-01', INTERVAL '1' YEAR);
----
NULL

query ?
select range(NULL, NULL, NULL);
----
NULL

query ?
select range(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR)
Expand All @@ -5668,16 +5679,26 @@ select generate_series(5),
----
[0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] [10, 7, 4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01, 1993-03-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] [1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01]

## should throw error
query error
## should return NULL
query ?
select generate_series(DATE '1992-09-01', NULL, INTERVAL '1' YEAR);
----
NULL

query error
query ?
select generate_series(DATE '1992-09-01', DATE '1993-03-01', NULL);
----
NULL

query error
query ?
select generate_series(NULL, DATE '1993-03-01', INTERVAL '1' YEAR);
----
NULL

query ?
select generate_series(NULL, NULL, NULL);
----
NULL

query ?
select generate_series(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR)
Expand Down

0 comments on commit dd56837

Please sign in to comment.