Skip to content

Commit

Permalink
Add date_bin function
Browse files Browse the repository at this point in the history
Similar to date_trunc, but allows binning by an arbitrary interval
rather than just full units.

Author: John Naylor <john.naylor@enterprisedb.com>
Reviewed-by: David Fetter <david@fetter.org>
Reviewed-by: Isaac Morland <isaac.morland@gmail.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Artur Zakirov <zaartur@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/CACPNZCt4buQFRgy6DyjuZS-2aPDpccRkrJBmgUfwYc1KiaXYxg@mail.gmail.com
  • Loading branch information
petere committed Mar 24, 2021
1 parent 1509c6f commit 49ab61f
Show file tree
Hide file tree
Showing 8 changed files with 402 additions and 1 deletion.
50 changes: 50 additions & 0 deletions doc/src/sgml/func.sgml
Expand Up @@ -8730,6 +8730,20 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
</para></entry>
</row>

<row>
<entry role="func_table_entry"><para role="func_signature">
<function>date_bin</function> ( <type>interval</type>, <type>timestamp</type>, <type>timestamp</type> )
<returnvalue>timestamp</returnvalue>
</para>
<para>
Bin input into specified interval aligned with specified origin; see <xref linkend="functions-datetime-bin"/>
</para>
<para>
<literal>date_bin('15 minutes', timestamp '2001-02-16 20:38:40', timestamp '2001-02-16 20:05:00')</literal>
<returnvalue>2001-02-16 20:35:00</returnvalue>
</para></entry>
</row>

<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
Expand Down Expand Up @@ -9868,6 +9882,42 @@ SELECT date_trunc('hour', INTERVAL '3 days 02:47:33');
</para>
</sect2>

<sect2 id="functions-datetime-bin">
<title><function>date_bin</function></title>

<indexterm>
<primary>date_bin</primary>
</indexterm>

<para>
The function <function>date_bin</function> <quote>bins</quote> the input
timestamp into the specified interval (the <firstterm>stride</firstterm>)
aligned with a specified origin.
</para>

<para>
Examples:
<screen>
SELECT date_bin('15 minutes', TIMESTAMP '2020-02-11 15:44:17', TIMESTAMP '2001-01-01');
<lineannotation>Result: </lineannotation><computeroutput>2020-02-11 15:30:00</computeroutput>

SELECT date_bin('15 minutes', TIMESTAMP '2020-02-11 15:44:17', TIMESTAMP '2001-01-01 00:02:30');
<lineannotation>Result: </lineannotation><computeroutput>2020-02-11 15:32:30</computeroutput>
</screen>
</para>

<para>
In cases full units (1 minute, 1 hour, etc.), it gives the same result as
the analogous <function>date_trunc</function> call, but the difference is
that <function>date_bin</function> can truncate to an arbitrary interval.
</para>

<para>
The <parameter>stride</parameter> interval cannot contain units of month
or larger.
</para>
</sect2>

<sect2 id="functions-datetime-zoneconvert">
<title><literal>AT TIME ZONE</literal></title>

Expand Down
74 changes: 74 additions & 0 deletions src/backend/utils/adt/timestamp.c
Expand Up @@ -3812,6 +3812,43 @@ timestamptz_age(PG_FUNCTION_ARGS)
*---------------------------------------------------------*/


/* timestamp_bin()
* Bin timestamp into specified interval.
*/
Datum
timestamp_bin(PG_FUNCTION_ARGS)
{
Interval *stride = PG_GETARG_INTERVAL_P(0);
Timestamp timestamp = PG_GETARG_TIMESTAMP(1);
Timestamp origin = PG_GETARG_TIMESTAMP(2);
Timestamp result,
tm_diff,
stride_usecs,
tm_delta;

if (TIMESTAMP_NOT_FINITE(timestamp))
PG_RETURN_TIMESTAMP(timestamp);

if (TIMESTAMP_NOT_FINITE(origin))
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("origin out of range")));

if (stride->month != 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("timestamps cannot be binned into intervals containing months or years")));

stride_usecs = stride->day * USECS_PER_DAY + stride->time;

tm_diff = timestamp - origin;
tm_delta = tm_diff - tm_diff % stride_usecs;;

result = origin + tm_delta;

PG_RETURN_TIMESTAMP(result);
}

/* timestamp_trunc()
* Truncate timestamp to specified units.
*/
Expand Down Expand Up @@ -3946,6 +3983,43 @@ timestamp_trunc(PG_FUNCTION_ARGS)
PG_RETURN_TIMESTAMP(result);
}

/* timestamptz_bin()
* Bin timestamptz into specified interval using specified origin.
*/
Datum
timestamptz_bin(PG_FUNCTION_ARGS)
{
Interval *stride = PG_GETARG_INTERVAL_P(0);
TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1);
TimestampTz origin = PG_GETARG_TIMESTAMPTZ(2);
TimestampTz result,
stride_usecs,
tm_diff,
tm_delta;

if (TIMESTAMP_NOT_FINITE(timestamp))
PG_RETURN_TIMESTAMPTZ(timestamp);

if (TIMESTAMP_NOT_FINITE(origin))
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("origin out of range")));

if (stride->month != 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("timestamps cannot be binned into intervals containing months or years")));

stride_usecs = stride->day * USECS_PER_DAY + stride->time;

tm_diff = timestamp - origin;
tm_delta = tm_diff - tm_diff % stride_usecs;;

result = origin + tm_delta;

PG_RETURN_TIMESTAMPTZ(result);
}

/*
* Common code for timestamptz_trunc() and timestamptz_trunc_zone().
*
Expand Down
2 changes: 1 addition & 1 deletion src/include/catalog/catversion.h
Expand Up @@ -53,6 +53,6 @@
*/

/* yyyymmddN */
#define CATALOG_VERSION_NO 202103241
#define CATALOG_VERSION_NO 202103242

#endif
11 changes: 11 additions & 0 deletions src/include/catalog/pg_proc.dat
Expand Up @@ -5813,6 +5813,17 @@
{ oid => '2020', descr => 'truncate timestamp to specified units',
proname => 'date_trunc', prorettype => 'timestamp',
proargtypes => 'text timestamp', prosrc => 'timestamp_trunc' },

{ oid => '8990',
descr => 'bin timestamp into specified interval',
proname => 'date_bin', prorettype => 'timestamp',
proargtypes => 'interval timestamp timestamp',
prosrc => 'timestamp_bin' },
{ oid => '8993',
descr => 'bin timestamp with time zone into specified interval',
proname => 'date_bin', prorettype => 'timestamptz',
proargtypes => 'interval timestamptz timestamptz', prosrc => 'timestamptz_bin' },

{ oid => '2021', descr => 'extract field from timestamp',
proname => 'date_part', prorettype => 'float8',
proargtypes => 'text timestamp', prosrc => 'timestamp_part' },
Expand Down
96 changes: 96 additions & 0 deletions src/test/regress/expected/timestamp.out
Expand Up @@ -545,6 +545,102 @@ SELECT date_trunc( 'week', timestamp '2004-02-29 15:44:17.71393' ) AS week_trunc
Mon Feb 23 00:00:00 2004
(1 row)

-- verify date_bin behaves the same as date_trunc for relevant intervals
-- case 1: AD dates, origin < input
SELECT
str,
interval,
date_trunc(str, ts) = date_bin(interval::interval, ts, timestamp '2001-01-01') AS equal
FROM (
VALUES
('week', '7 d'),
('day', '1 d'),
('hour', '1 h'),
('minute', '1 m'),
('second', '1 s'),
('millisecond', '1 ms'),
('microsecond', '1 us')
) intervals (str, interval),
(VALUES (timestamp '2020-02-29 15:44:17.71393')) ts (ts);
str | interval | equal
-------------+----------+-------
week | 7 d | t
day | 1 d | t
hour | 1 h | t
minute | 1 m | t
second | 1 s | t
millisecond | 1 ms | t
microsecond | 1 us | t
(7 rows)

-- case 2: BC dates, origin < input
SELECT
str,
interval,
date_trunc(str, ts) = date_bin(interval::interval, ts, timestamp '2000-01-01 BC') AS equal
FROM (
VALUES
('week', '7 d'),
('day', '1 d'),
('hour', '1 h'),
('minute', '1 m'),
('second', '1 s'),
('millisecond', '1 ms'),
('microsecond', '1 us')
) intervals (str, interval),
(VALUES (timestamp '0055-6-10 15:44:17.71393 BC')) ts (ts);
str | interval | equal
-------------+----------+-------
week | 7 d | t
day | 1 d | t
hour | 1 h | t
minute | 1 m | t
second | 1 s | t
millisecond | 1 ms | t
microsecond | 1 us | t
(7 rows)

-- bin timestamps into arbitrary intervals
SELECT
interval,
ts,
origin,
date_bin(interval::interval, ts, origin)
FROM (
VALUES
('15 days'),
('2 hours'),
('1 hour 30 minutes'),
('15 minutes'),
('10 seconds'),
('100 milliseconds'),
('250 microseconds')
) intervals (interval),
(VALUES (timestamp '2020-02-11 15:44:17.71393')) ts (ts),
(VALUES (timestamp '2001-01-01')) origin (origin);
interval | ts | origin | date_bin
-------------------+--------------------------------+--------------------------+--------------------------------
15 days | Tue Feb 11 15:44:17.71393 2020 | Mon Jan 01 00:00:00 2001 | Thu Feb 06 00:00:00 2020
2 hours | Tue Feb 11 15:44:17.71393 2020 | Mon Jan 01 00:00:00 2001 | Tue Feb 11 14:00:00 2020
1 hour 30 minutes | Tue Feb 11 15:44:17.71393 2020 | Mon Jan 01 00:00:00 2001 | Tue Feb 11 15:00:00 2020
15 minutes | Tue Feb 11 15:44:17.71393 2020 | Mon Jan 01 00:00:00 2001 | Tue Feb 11 15:30:00 2020
10 seconds | Tue Feb 11 15:44:17.71393 2020 | Mon Jan 01 00:00:00 2001 | Tue Feb 11 15:44:10 2020
100 milliseconds | Tue Feb 11 15:44:17.71393 2020 | Mon Jan 01 00:00:00 2001 | Tue Feb 11 15:44:17.7 2020
250 microseconds | Tue Feb 11 15:44:17.71393 2020 | Mon Jan 01 00:00:00 2001 | Tue Feb 11 15:44:17.71375 2020
(7 rows)

-- shift bins using the origin parameter:
SELECT date_bin('5 min'::interval, timestamp '2020-02-01 01:01:01', timestamp '2020-02-01 00:02:30');
date_bin
--------------------------
Sat Feb 01 00:57:30 2020
(1 row)

-- disallow intervals with months or years
SELECT date_bin('5 months'::interval, timestamp '2020-02-01 01:01:01', timestamp '2001-01-01');
ERROR: timestamps cannot be binned into intervals containing months or years
SELECT date_bin('5 years'::interval, timestamp '2020-02-01 01:01:01', timestamp '2001-01-01');
ERROR: timestamps cannot be binned into intervals containing months or years
-- Test casting within a BETWEEN qualifier
SELECT d1 - timestamp without time zone '1997-01-02' AS diff
FROM TIMESTAMP_TBL
Expand Down
66 changes: 66 additions & 0 deletions src/test/regress/expected/timestamptz.out
Expand Up @@ -663,6 +663,72 @@ SELECT date_trunc('day', timestamp with time zone '2001-02-16 20:38:40+00', 'VET
Thu Feb 15 20:00:00 2001 PST
(1 row)

-- verify date_bin behaves the same as date_trunc for relevant intervals
SELECT
str,
interval,
date_trunc(str, ts, 'Australia/Sydney') = date_bin(interval::interval, ts, timestamp with time zone '2001-01-01+11') AS equal
FROM (
VALUES
('day', '1 d'),
('hour', '1 h'),
('minute', '1 m'),
('second', '1 s'),
('millisecond', '1 ms'),
('microsecond', '1 us')
) intervals (str, interval),
(VALUES (timestamptz '2020-02-29 15:44:17.71393+00')) ts (ts);
str | interval | equal
-------------+----------+-------
day | 1 d | t
hour | 1 h | t
minute | 1 m | t
second | 1 s | t
millisecond | 1 ms | t
microsecond | 1 us | t
(6 rows)

-- bin timestamps into arbitrary intervals
SELECT
interval,
ts,
origin,
date_bin(interval::interval, ts, origin)
FROM (
VALUES
('15 days'),
('2 hours'),
('1 hour 30 minutes'),
('15 minutes'),
('10 seconds'),
('100 milliseconds'),
('250 microseconds')
) intervals (interval),
(VALUES (timestamptz '2020-02-11 15:44:17.71393')) ts (ts),
(VALUES (timestamptz '2001-01-01')) origin (origin);
interval | ts | origin | date_bin
-------------------+------------------------------------+------------------------------+------------------------------------
15 days | Tue Feb 11 15:44:17.71393 2020 PST | Mon Jan 01 00:00:00 2001 PST | Thu Feb 06 00:00:00 2020 PST
2 hours | Tue Feb 11 15:44:17.71393 2020 PST | Mon Jan 01 00:00:00 2001 PST | Tue Feb 11 14:00:00 2020 PST
1 hour 30 minutes | Tue Feb 11 15:44:17.71393 2020 PST | Mon Jan 01 00:00:00 2001 PST | Tue Feb 11 15:00:00 2020 PST
15 minutes | Tue Feb 11 15:44:17.71393 2020 PST | Mon Jan 01 00:00:00 2001 PST | Tue Feb 11 15:30:00 2020 PST
10 seconds | Tue Feb 11 15:44:17.71393 2020 PST | Mon Jan 01 00:00:00 2001 PST | Tue Feb 11 15:44:10 2020 PST
100 milliseconds | Tue Feb 11 15:44:17.71393 2020 PST | Mon Jan 01 00:00:00 2001 PST | Tue Feb 11 15:44:17.7 2020 PST
250 microseconds | Tue Feb 11 15:44:17.71393 2020 PST | Mon Jan 01 00:00:00 2001 PST | Tue Feb 11 15:44:17.71375 2020 PST
(7 rows)

-- shift bins using the origin parameter:
SELECT date_bin('5 min'::interval, timestamptz '2020-02-01 01:01:01+00', timestamptz '2020-02-01 00:02:30+00');
date_bin
------------------------------
Fri Jan 31 16:57:30 2020 PST
(1 row)

-- disallow intervals with months or years
SELECT date_bin('5 months'::interval, timestamp with time zone '2020-02-01 01:01:01+00', timestamp with time zone '2001-01-01+00');
ERROR: timestamps cannot be binned into intervals containing months or years
SELECT date_bin('5 years'::interval, timestamp with time zone '2020-02-01 01:01:01+00', timestamp with time zone '2001-01-01+00');
ERROR: timestamps cannot be binned into intervals containing months or years
-- Test casting within a BETWEEN qualifier
SELECT d1 - timestamp with time zone '1997-01-02' AS diff
FROM TIMESTAMPTZ_TBL
Expand Down

0 comments on commit 49ab61f

Please sign in to comment.