From dd5683745e7d527b01b804c8f4f1a0a53aa225e8 Mon Sep 17 00:00:00 2001 From: Lordworms <48054792+Lordworms@users.noreply.github.com> Date: Mon, 29 Apr 2024 18:14:06 -0500 Subject: [PATCH] Minor: return NULL for range and generate_series (#10275) * return NULL for range and generate_series * Update datafusion/sqllogictest/test_files/array.slt Co-authored-by: Andrew Lamb * Update datafusion/sqllogictest/test_files/array.slt Co-authored-by: Andrew Lamb --------- Co-authored-by: Andrew Lamb --- datafusion/functions-array/src/range.rs | 45 ++++-- datafusion/functions-array/src/udf.rs | 140 ------------------- datafusion/sqllogictest/test_files/array.slt | 37 +++-- 3 files changed, 59 insertions(+), 163 deletions(-) diff --git a/datafusion/functions-array/src/range.rs b/datafusion/functions-array/src/range.rs index 1c9e0c878e6e..150fe5960266 100644 --- a/datafusion/functions-array/src/range.rs +++ b/datafusion/functions-array/src/range.rs @@ -17,14 +17,12 @@ //! [`ScalarUDFImpl`] definitions for range and gen_series functions. +use crate::utils::make_scalar_function; use arrow::array::{Array, ArrayRef, Int64Array, ListArray}; use arrow::datatypes::{DataType, Field}; -use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer}; -use std::any::Any; - -use crate::utils::make_scalar_function; use arrow_array::types::{Date32Type, IntervalMonthDayNanoType}; -use arrow_array::Date32Array; +use arrow_array::{Date32Array, NullArray}; +use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer}; use arrow_schema::DataType::{Date32, Int64, Interval, List}; use arrow_schema::IntervalUnit::MonthDayNano; use datafusion_common::cast::{as_date32_array, as_int64_array, as_interval_mdn_array}; @@ -34,6 +32,7 @@ use datafusion_expr::Expr; use datafusion_expr::{ ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; +use std::any::Any; use std::sync::Arc; make_udf_function!( @@ -57,6 +56,7 @@ impl Range { TypeSignature::Exact(vec![Int64, Int64]), TypeSignature::Exact(vec![Int64, Int64, Int64]), TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]), + TypeSignature::Any(3), ], Volatility::Immutable, ), @@ -77,14 +77,21 @@ impl ScalarUDFImpl for Range { } fn return_type(&self, arg_types: &[DataType]) -> Result { - Ok(List(Arc::new(Field::new( - "item", - arg_types[0].clone(), - true, - )))) + if arg_types.iter().any(|t| t.eq(&DataType::Null)) { + Ok(DataType::Null) + } else { + Ok(List(Arc::new(Field::new( + "item", + arg_types[0].clone(), + true, + )))) + } } fn invoke(&self, args: &[ColumnarValue]) -> Result { + if args.iter().any(|arg| arg.data_type() == DataType::Null) { + return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1)))); + } match args[0].data_type() { Int64 => make_scalar_function(|args| gen_range_inner(args, false))(args), Date32 => make_scalar_function(|args| gen_range_date(args, false))(args), @@ -120,6 +127,7 @@ impl GenSeries { TypeSignature::Exact(vec![Int64, Int64]), TypeSignature::Exact(vec![Int64, Int64, Int64]), TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]), + TypeSignature::Any(3), ], Volatility::Immutable, ), @@ -140,14 +148,21 @@ impl ScalarUDFImpl for GenSeries { } fn return_type(&self, arg_types: &[DataType]) -> Result { - Ok(List(Arc::new(Field::new( - "item", - arg_types[0].clone(), - true, - )))) + if arg_types.iter().any(|t| t.eq(&DataType::Null)) { + Ok(DataType::Null) + } else { + Ok(List(Arc::new(Field::new( + "item", + arg_types[0].clone(), + true, + )))) + } } fn invoke(&self, args: &[ColumnarValue]) -> Result { + if args.iter().any(|arg| arg.data_type() == DataType::Null) { + return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1)))); + } match args[0].data_type() { Int64 => make_scalar_function(|args| gen_range_inner(args, true))(args), Date32 => make_scalar_function(|args| gen_range_date(args, true))(args), diff --git a/datafusion/functions-array/src/udf.rs b/datafusion/functions-array/src/udf.rs index 1462b3efad33..c723fbb42cfc 100644 --- a/datafusion/functions-array/src/udf.rs +++ b/datafusion/functions-array/src/udf.rs @@ -166,146 +166,6 @@ impl ScalarUDFImpl for StringToArray { } } -make_udf_function!( - Range, - range, - start stop step, - "create a list of values in the range between start and stop", - range_udf -); -#[derive(Debug)] -pub struct Range { - signature: Signature, - aliases: Vec, -} -impl Range { - pub fn new() -> Self { - use DataType::*; - Self { - signature: Signature::one_of( - vec![ - TypeSignature::Exact(vec![Int64]), - TypeSignature::Exact(vec![Int64, Int64]), - TypeSignature::Exact(vec![Int64, Int64, Int64]), - TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]), - ], - Volatility::Immutable, - ), - aliases: vec![String::from("range")], - } - } -} -impl ScalarUDFImpl for Range { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { - "range" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, arg_types: &[DataType]) -> Result { - use DataType::*; - Ok(List(Arc::new(Field::new( - "item", - arg_types[0].clone(), - true, - )))) - } - - fn invoke(&self, args: &[ColumnarValue]) -> Result { - let args = ColumnarValue::values_to_arrays(args)?; - match args[0].data_type() { - arrow::datatypes::DataType::Int64 => { - crate::kernels::gen_range(&args, false).map(ColumnarValue::Array) - } - arrow::datatypes::DataType::Date32 => { - crate::kernels::gen_range_date(&args, false).map(ColumnarValue::Array) - } - _ => { - exec_err!("unsupported type for range") - } - } - } - - fn aliases(&self) -> &[String] { - &self.aliases - } -} - -make_udf_function!( - GenSeries, - gen_series, - start stop step, - "create a list of values in the range between start and stop, include upper bound", - gen_series_udf -); -#[derive(Debug)] -pub struct GenSeries { - signature: Signature, - aliases: Vec, -} -impl GenSeries { - pub fn new() -> Self { - use DataType::*; - Self { - signature: Signature::one_of( - vec![ - TypeSignature::Exact(vec![Int64]), - TypeSignature::Exact(vec![Int64, Int64]), - TypeSignature::Exact(vec![Int64, Int64, Int64]), - TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]), - ], - Volatility::Immutable, - ), - aliases: vec![String::from("generate_series")], - } - } -} -impl ScalarUDFImpl for GenSeries { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { - "generate_series" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, arg_types: &[DataType]) -> Result { - use DataType::*; - Ok(List(Arc::new(Field::new( - "item", - arg_types[0].clone(), - true, - )))) - } - - fn invoke(&self, args: &[ColumnarValue]) -> Result { - let args = ColumnarValue::values_to_arrays(args)?; - match args[0].data_type() { - arrow::datatypes::DataType::Int64 => { - crate::kernels::gen_range(&args, true).map(ColumnarValue::Array) - } - arrow::datatypes::DataType::Date32 => { - crate::kernels::gen_range_date(&args, true).map(ColumnarValue::Array) - } - _ => { - exec_err!("unsupported type for range") - } - } - } - - fn aliases(&self) -> &[String] { - &self.aliases - } -} - make_udf_function!( ArrayDims, array_dims, diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index b33419ecd47c..3b90187f07e0 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5634,15 +5634,26 @@ select range(NULL) ---- NULL -## should throw error -query error +## should return NULL +query ? select range(DATE '1992-09-01', NULL, INTERVAL '1' YEAR); +---- +NULL -query error +query ? select range(DATE '1992-09-01', DATE '1993-03-01', NULL); +---- +NULL -query error +query ? select range(NULL, DATE '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + +query ? +select range(NULL, NULL, NULL); +---- +NULL query ? select range(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR) @@ -5668,16 +5679,26 @@ select generate_series(5), ---- [0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] [10, 7, 4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01, 1993-03-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] [1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] -## should throw error -query error +## should return NULL +query ? select generate_series(DATE '1992-09-01', NULL, INTERVAL '1' YEAR); +---- +NULL -query error +query ? select generate_series(DATE '1992-09-01', DATE '1993-03-01', NULL); +---- +NULL -query error +query ? select generate_series(NULL, DATE '1993-03-01', INTERVAL '1' YEAR); +---- +NULL +query ? +select generate_series(NULL, NULL, NULL); +---- +NULL query ? select generate_series(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR)