Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support date_part function and more fields for extract #8830

Merged
merged 23 commits into from Apr 4, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
80 changes: 40 additions & 40 deletions e2e_test/batch/duckdb/join/iejoin/test_iejoin.test.slt.part
Expand Up @@ -140,46 +140,46 @@ WITH data_table AS (
GROUP BY bucket, low, high
ORDER BY bucket;
----
0 1577836800 1578627360 10
1 1578627360 1579417920 9
2 1579417920 1580208480 9
3 1580208480 1580999040 9
4 1580999040 1581789600 9
5 1581789600 1582580160 9
6 1582580160 1583370720 10
7 1583370720 1584161280 9
8 1584161280 1584951840 9
9 1584951840 1585742400 9
10 1585742400 1586532960 9
11 1586532960 1587323520 9
12 1587323520 1588114080 9
13 1588114080 1588904640 10
14 1588904640 1589695200 9
15 1589695200 1590485760 9
16 1590485760 1591276320 9
17 1591276320 1592066880 9
18 1592066880 1592857440 9
19 1592857440 1593648000 9
20 1593648000 1594438560 10
21 1594438560 1595229120 9
22 1595229120 1596019680 9
23 1596019680 1596810240 9
24 1596810240 1597600800 9
25 1597600800 1598391360 9
26 1598391360 1599181920 10
27 1599181920 1599972480 9
28 1599972480 1600763040 9
29 1600763040 1601553600 9
30 1601553600 1602344160 9
31 1602344160 1603134720 9
32 1603134720 1603925280 9
33 1603925280 1604715840 10
34 1604715840 1605506400 9
35 1605506400 1606296960 9
36 1606296960 1607087520 9
37 1607087520 1607878080 9
38 1607878080 1608668640 9
39 1608668640 1609459200 9
0 1577836800.000000 1578627360.000000 10
1 1578627360.000000 1579417920.000000 9
2 1579417920.000000 1580208480.000000 9
3 1580208480.000000 1580999040.000000 9
4 1580999040.000000 1581789600.000000 9
5 1581789600.000000 1582580160.000000 9
6 1582580160.000000 1583370720.000000 10
7 1583370720.000000 1584161280.000000 9
8 1584161280.000000 1584951840.000000 9
9 1584951840.000000 1585742400.000000 9
10 1585742400.000000 1586532960.000000 9
11 1586532960.000000 1587323520.000000 9
12 1587323520.000000 1588114080.000000 9
13 1588114080.000000 1588904640.000000 10
14 1588904640.000000 1589695200.000000 9
15 1589695200.000000 1590485760.000000 9
16 1590485760.000000 1591276320.000000 9
17 1591276320.000000 1592066880.000000 9
18 1592066880.000000 1592857440.000000 9
19 1592857440.000000 1593648000.000000 9
20 1593648000.000000 1594438560.000000 10
21 1594438560.000000 1595229120.000000 9
22 1595229120.000000 1596019680.000000 9
23 1596019680.000000 1596810240.000000 9
24 1596810240.000000 1597600800.000000 9
25 1597600800.000000 1598391360.000000 9
26 1598391360.000000 1599181920.000000 10
27 1599181920.000000 1599972480.000000 9
28 1599972480.000000 1600763040.000000 9
29 1600763040.000000 1601553600.000000 9
30 1601553600.000000 1602344160.000000 9
31 1602344160.000000 1603134720.000000 9
32 1603134720.000000 1603925280.000000 9
33 1603925280.000000 1604715840.000000 10
34 1604715840.000000 1605506400.000000 9
35 1605506400.000000 1606296960.000000 9
36 1606296960.000000 1607087520.000000 9
37 1607087520.000000 1607878080.000000 9
38 1607878080.000000 1608668640.000000 9
39 1608668640.000000 1609459200.000000 9


statement ok
Expand Down
1 change: 1 addition & 0 deletions proto/expr.proto
Expand Up @@ -45,6 +45,7 @@ message ExprNode {
BITWISE_SHIFT_RIGHT = 36;
// date functions
EXTRACT = 101;
DATE_PART = 102;
TUMBLE_START = 103;
// From f64 to timestamp.
// e.g. `select to_timestamp(1672044740.0)`
Expand Down
6 changes: 3 additions & 3 deletions src/common/src/array/arrow.rs
Expand Up @@ -353,10 +353,10 @@ impl FromIntoArrow for Interval {

fn into_arrow(self) -> Self::ArrowType {
arrow_array::types::IntervalMonthDayNanoType::make_value(
self.get_months(),
self.get_days(),
self.months(),
self.days(),
// TODO: this may overflow and we need `try_into`
self.get_usecs() * 1000,
self.usecs() * 1000,
)
}
}
Expand Down
12 changes: 6 additions & 6 deletions src/common/src/array/interval_array.rs
Expand Up @@ -35,17 +35,17 @@ mod tests {
}
let ret_arr = array_builder.finish();
for v in ret_arr.iter().flatten() {
assert_eq!(v.get_months(), 12);
assert_eq!(v.get_days(), 0);
assert_eq!(v.months(), 12);
assert_eq!(v.days(), 0);
}
let ret_arr = IntervalArray::from_iter([Some(Interval::from_ymd(1, 0, 0)), None]);
let v = ret_arr.value_at(0).unwrap();
assert_eq!(v.get_months(), 12);
assert_eq!(v.get_days(), 0);
assert_eq!(v.months(), 12);
assert_eq!(v.days(), 0);
let v = ret_arr.value_at(1);
assert_eq!(v, None);
let v = unsafe { ret_arr.value_at_unchecked(0).unwrap() };
assert_eq!(v.get_months(), 12);
assert_eq!(v.get_days(), 0);
assert_eq!(v.months(), 12);
assert_eq!(v.days(), 0);
}
}
8 changes: 4 additions & 4 deletions src/common/src/types/chrono_wrapper.rs
Expand Up @@ -525,13 +525,13 @@ impl CheckedAdd<Interval> for Timestamp {

fn checked_add(self, rhs: Interval) -> Option<Timestamp> {
let mut date = self.0.date();
if rhs.get_months() != 0 {
if rhs.months() != 0 {
// NaiveDate don't support add months. We need calculate manually
let mut day = date.day() as i32;
let mut month = date.month() as i32;
let mut year = date.year();
// Calculate the number of year in this interval
let interval_months = rhs.get_months();
let interval_months = rhs.months();
let year_diff = interval_months / 12;
year += year_diff;

Expand All @@ -556,8 +556,8 @@ impl CheckedAdd<Interval> for Timestamp {
date = NaiveDate::from_ymd_opt(year, month as u32, day as u32)?;
}
let mut datetime = NaiveDateTime::new(date, self.0.time());
datetime = datetime.checked_add_signed(Duration::days(rhs.get_days().into()))?;
datetime = datetime.checked_add_signed(Duration::microseconds(rhs.get_usecs()))?;
datetime = datetime.checked_add_signed(Duration::days(rhs.days().into()))?;
datetime = datetime.checked_add_signed(Duration::microseconds(rhs.usecs()))?;

Some(Timestamp::new(datetime))
}
Expand Down
99 changes: 87 additions & 12 deletions src/common/src/types/interval.rs
Expand Up @@ -57,6 +57,7 @@ impl Interval {
usecs: i64::MIN,
};

/// Creates a new `Interval` from the given number of months, days, and microseconds.
pub fn from_month_day_usec(months: i32, days: i32, usecs: i64) -> Self {
Interval {
months,
Expand All @@ -65,22 +66,96 @@ impl Interval {
}
}

pub fn get_days(&self) -> i32 {
self.days
/// Returns the total number of whole months.
///
/// Note the difference between [`months`] and [`months_field`].
/// ```
/// # use risingwave_common::types::Interval;
/// let interval: Interval = "5 yrs 1 month".parse().unwrap();
/// assert_eq!(interval.months(), 61);
/// assert_eq!(interval.months_field(), 1);
/// ```
pub fn months(&self) -> i32 {
self.months
}

pub fn get_months(&self) -> i32 {
self.months
/// Returns the number of days.
pub fn days(&self) -> i32 {
self.days
}

pub fn get_usecs(&self) -> i64 {
/// Returns the total number of microseconds in a day.
wangrunji0408 marked this conversation as resolved.
Show resolved Hide resolved
pub fn usecs(&self) -> i64 {
self.usecs
}

pub fn get_usecs_of_day(&self) -> u64 {
/// Calculates the remaining number of microseconds in a day.
///
/// Note the difference between [`usecs`] and [`usecs_of_day`].
/// ```
/// # use risingwave_common::types::Interval;
/// let interval: Interval = "-1:00:00".parse().unwrap();
/// assert_eq!(interval.usecs(), -1 * 60 * 60 * 1_000_000);
/// assert_eq!(interval.usecs_of_day(), 23 * 60 * 60 * 1_000_000);
/// ```
pub fn usecs_of_day(&self) -> u64 {
self.usecs.rem_euclid(USECS_PER_DAY) as u64
}

/// Returns the years field.
pub fn years_field(&self) -> i32 {
self.months / 12
}

/// Returns the months field. range: 0-11
wangrunji0408 marked this conversation as resolved.
Show resolved Hide resolved
pub fn months_field(&self) -> i32 {
self.months % 12
}

/// Returns the days field.
pub fn days_field(&self) -> i32 {
self.days
}

/// Returns the hours field. range: -23..=23
pub fn hours_field(&self) -> i32 {
wangrunji0408 marked this conversation as resolved.
Show resolved Hide resolved
(self.usecs / USECS_PER_SEC / 3600 % 24) as i32
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In PostgreSQL:

test=# select extract(hour from interval '100 hour');
 extract 
---------
     100
(1 row)

Basically, in Interval, the fields are (months, days, usecs), because days per month and hours per day can vary.

In pg_itm or the _field series, the separation is still preserved:

  • months -> unlimited years_field and bounded months_field
  • days -> days no change
  • usecs -> unlimited hours_field, bounded minutes_fields, and bounded fractional seconds seconds_in_micros. I guess we agree the fractional seconds part should not be named as _field.


/// Returns the minutes field. range: -59..=-59
pub fn minutes_field(&self) -> i32 {
(self.usecs / USECS_PER_SEC / 60 % 60) as i32
}

/// Returns the seconds field, including fractional parts, in microseconds.
/// range: -59,999,999..=59,999,999
pub fn seconds_in_micros(&self) -> i32 {
(self.usecs % (USECS_PER_SEC * 60)) as i32
}

/// Returns the microseconds since 1970-01-01 00:00:00+00.
wangrunji0408 marked this conversation as resolved.
Show resolved Hide resolved
pub fn epoch_in_micros(&self) -> i128 {
// https://github.com/postgres/postgres/blob/REL_15_2/src/backend/utils/adt/timestamp.c#L5304
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just sharing a wired behavior of PostgreSQL. Not suggesting any actions...
https://www.db-fiddle.com/f/5LLhMM7MY2DpiQwNzxYMkV/0
Unequal intervals may return the same epoch, and equal intervals may return different epochs.


const DAYS_PER_YEAR_X4: i32 = 365 * 4 + 1;
const DAYS_PER_MONTH: i32 = 30;
const SECS_PER_DAY: i32 = 86400;
const MONTHS_PER_YEAR: i32 = 12;

// To do this calculation in integer arithmetic even though
// DAYS_PER_YEAR is fractional, multiply everything by 4 and then
// divide by 4 again at the end. This relies on DAYS_PER_YEAR
// being a multiple of 0.25 and on SECS_PER_DAY being a multiple
// of 4.
let secs_from_day_month = ((DAYS_PER_YEAR_X4 as i64)
* (self.months / MONTHS_PER_YEAR) as i64
+ (4 * DAYS_PER_MONTH as i64) * (self.months % MONTHS_PER_YEAR) as i64
+ 4 * self.days as i64)
* (SECS_PER_DAY / 4) as i64;

secs_from_day_month as i128 * USECS_PER_SEC as i128 + self.usecs as i128
}

pub fn to_protobuf<T: Write>(self, output: &mut T) -> ArrayResult<usize> {
output.write_i32::<BigEndian>(self.months)?;
output.write_i32::<BigEndian>(self.days)?;
Expand Down Expand Up @@ -1483,9 +1558,9 @@ mod tests {
}
Some((rhs_months, rhs_days, rhs_usecs, rhs_str)) => {
// We should test individual fields rather than using custom `Eq`
assert_eq!(actual_deserialize.unwrap().get_months(), rhs_months);
assert_eq!(actual_deserialize.unwrap().get_days(), rhs_days);
assert_eq!(actual_deserialize.unwrap().get_usecs(), rhs_usecs);
assert_eq!(actual_deserialize.unwrap().months(), rhs_months);
assert_eq!(actual_deserialize.unwrap().days(), rhs_days);
assert_eq!(actual_deserialize.unwrap().usecs(), rhs_usecs);
assert_eq!(actual_deserialize.unwrap().to_string(), rhs_str);
}
}
Expand All @@ -1495,9 +1570,9 @@ mod tests {
let input = Interval::from_month_day_usec(i32::MIN, -30, 1);
let actual_deserialize = IntervalCmpValue::from(input).as_justified();
// It has a justified interval within range, and can be obtained by our deserialization.
assert_eq!(actual_deserialize.unwrap().get_months(), i32::MIN);
assert_eq!(actual_deserialize.unwrap().get_days(), -29);
assert_eq!(actual_deserialize.unwrap().get_usecs(), -USECS_PER_DAY + 1);
assert_eq!(actual_deserialize.unwrap().months(), i32::MIN);
assert_eq!(actual_deserialize.unwrap().days(), -29);
assert_eq!(actual_deserialize.unwrap().usecs(), -USECS_PER_DAY + 1);
}

#[test]
Expand Down
6 changes: 3 additions & 3 deletions src/common/src/util/value_encoding/mod.rs
Expand Up @@ -293,9 +293,9 @@ fn estimate_serialize_str_size(bytes: &[u8]) -> usize {
}

fn serialize_interval(interval: &Interval, buf: &mut impl BufMut) {
buf.put_i32_le(interval.get_months());
buf.put_i32_le(interval.get_days());
buf.put_i64_le(interval.get_usecs());
buf.put_i32_le(interval.months());
buf.put_i32_le(interval.days());
buf.put_i64_le(interval.usecs());
}

fn estimate_serialize_interval_size() -> usize {
Expand Down
8 changes: 4 additions & 4 deletions src/expr/src/vector_op/arithmetic_op.rs
Expand Up @@ -234,14 +234,14 @@ pub fn interval_timestamptz_add(l: Interval, r: i64) -> Result<i64> {
#[inline(always)]
fn timestamptz_interval_inner(l: i64, r: Interval, f: fn(i64, i64) -> Option<i64>) -> Result<i64> {
// Without session TimeZone, we cannot add month/day in local time. See #5826.
if r.get_months() != 0 || r.get_days() != 0 {
if r.months() != 0 || r.days() != 0 {
return Err(ExprError::UnsupportedFunction(
"timestamp with time zone +/- interval of days".into(),
));
}

let result: Option<i64> = try {
let delta_usecs = r.get_usecs();
let delta_usecs = r.usecs();
f(l, delta_usecs)?
};

Expand Down Expand Up @@ -280,7 +280,7 @@ pub fn time_time_sub(l: Time, r: Time) -> Result<Interval> {
#[function("subtract(time, interval) -> time")]
pub fn time_interval_sub(l: Time, r: Interval) -> Result<Time> {
let time = l.0;
let (new_time, ignored) = time.overflowing_sub_signed(Duration::microseconds(r.get_usecs()));
let (new_time, ignored) = time.overflowing_sub_signed(Duration::microseconds(r.usecs()));
if ignored == 0 {
Ok(Time::new(new_time))
} else {
Expand All @@ -291,7 +291,7 @@ pub fn time_interval_sub(l: Time, r: Interval) -> Result<Time> {
#[function("add(time, interval) -> time")]
pub fn time_interval_add(l: Time, r: Interval) -> Result<Time> {
let time = l.0;
let (new_time, ignored) = time.overflowing_add_signed(Duration::microseconds(r.get_usecs()));
let (new_time, ignored) = time.overflowing_add_signed(Duration::microseconds(r.usecs()));
if ignored == 0 {
Ok(Time::new(new_time))
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/expr/src/vector_op/cast.rs
Expand Up @@ -384,7 +384,7 @@ pub fn timestamp_to_time(elem: Timestamp) -> Time {
/// In `PostgreSQL`, casting from interval to time discards the days part.
#[function("cast(interval) -> time")]
pub fn interval_to_time(elem: Interval) -> Time {
let usecs = elem.get_usecs_of_day();
let usecs = elem.usecs_of_day();
let secs = (usecs / 1_000_000) as u32;
let nano = (usecs % 1_000_000 * 1000) as u32;
Time::from_num_seconds_from_midnight_uncheck(secs, nano)
Expand Down