Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow bucketing by month, year, century in time_bucket and time_bucket_gapfill #4641

Merged
merged 2 commits into from Aug 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Expand Up @@ -10,6 +10,7 @@ accidentally triggering the load of a previous DB version.**
* #4374 Remove constified now() constraints from plan
* #4393 Support intervals with day component when constifying now()
* #4397 Support intervals with month component when constifying now()
* #4641 Allow bucketing by month in time_bucket

**Bugfixes**
* #4486 Adding boolean column with default value doesn't work on compressed table
Expand All @@ -18,7 +19,7 @@ accidentally triggering the load of a previous DB version.**
* #4416 Handle TRUNCATE TABLE on chunks

**Thanks**
@janko for reporting
@janko for reporting an issue when adding bool column with default value to compressed hypertable
@AlmiS for reporting error on `get_partition_hash` executed inside an IMMUTABLE function
@michaelkitson for reporting permission errors using default privileges on Continuous Aggregates
@jayadevanm for reporting error of TRUNCATE TABLE on compressed chunk
Expand Down
134 changes: 116 additions & 18 deletions src/time_bucket.c
Expand Up @@ -95,8 +95,8 @@ ts_int64_bucket(PG_FUNCTION_ARGS)

/*
* The default origin is Monday 2000-01-03. We don't use PG epoch since it starts on a saturday.
* This makes time-buckets by a week more intuitive and aligns it with
* date_trunc.
* This makes time-buckets by a week more intuitive and aligns it with date_trunc. Since month
* bucketing ignores the day component this makes origin for month buckets 2000-01-01.
*/
#define DEFAULT_ORIGIN (JAN_3_2000)
#define TIME_BUCKET_TS(period, timestamp, result, shift) \
Expand Down Expand Up @@ -134,6 +134,49 @@ ts_int64_bucket(PG_FUNCTION_ARGS)
(result) += (shift); \
} while (0)

static void
validate_month_bucket(Interval *interval)
{
/*
* Bucketing by a month and non-month cannot be mixed.
*/
Assert(interval->month);

if (interval->day || interval->time)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("month intervals cannot have day or time component")));
}
}

/*
* To bucket by month we get the year and month of a date and convert
* that to the nth month since origin. This allows us to treat month
* bucketing similar to int bucketing. During this process we ignore
* the day component and therefore only support bucketing by full months.
*/
static DateADT
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add some highlevel comments about this function?

bucket_month(int32 period, DateADT date, DateADT origin)
{
int32 year, month, day;
int32 result;

j2date(date + POSTGRES_EPOCH_JDATE, &year, &month, &day);
int32 timestamp = year * 12 + month - 1;

j2date(origin + POSTGRES_EPOCH_JDATE, &year, &month, &day);
int32 offset = year * 12 + month - 1;

TIME_BUCKET(period, timestamp, offset, PG_INT32_MIN, PG_INT32_MAX, result);

year = result / 12;
month = result % 12;
day = 1;

return date2j(year, month + 1, day) - POSTGRES_EPOCH_JDATE;
}

/* Returns the period in the same representation as Postgres Timestamps.
* Note that this is not our internal representation (microseconds).
* Always returns an exact value.*/
Expand Down Expand Up @@ -163,14 +206,32 @@ ts_timestamp_bucket(PG_FUNCTION_ARGS)
*/
Timestamp origin = (PG_NARGS() > 2 ? PG_GETARG_TIMESTAMP(2) : DEFAULT_ORIGIN);
Timestamp result;
int64 period = get_interval_period_timestamp_units(interval);

if (TIMESTAMP_NOT_FINITE(timestamp))
PG_RETURN_TIMESTAMP(timestamp);

TIME_BUCKET_TS(period, timestamp, result, origin);
if (interval->month)
{
DateADT origin_date = 0;
validate_month_bucket(interval);

DateADT date = DatumGetDateADT(DirectFunctionCall1(timestamp_date, PG_GETARG_DATUM(1)));
if (origin != DEFAULT_ORIGIN)
origin_date =
DatumGetDateADT(DirectFunctionCall1(timestamp_date, TimestampGetDatum(origin)));

date = bucket_month(interval->month, date, origin_date);

PG_RETURN_DATUM(DirectFunctionCall1(date_timestamp, DateADTGetDatum(date)));
}
else
{
int64 period = get_interval_period_timestamp_units(interval);

TIME_BUCKET_TS(period, timestamp, result, origin);

PG_RETURN_TIMESTAMP(result);
PG_RETURN_TIMESTAMP(result);
}
}

TS_FUNCTION_INFO_V1(ts_timestamptz_bucket);
Expand All @@ -187,14 +248,37 @@ ts_timestamptz_bucket(PG_FUNCTION_ARGS)
*/
TimestampTz origin = (PG_NARGS() > 2 ? PG_GETARG_TIMESTAMPTZ(2) : DEFAULT_ORIGIN);
TimestampTz result;
int64 period = get_interval_period_timestamp_units(interval);

if (TIMESTAMP_NOT_FINITE(timestamp))
PG_RETURN_TIMESTAMPTZ(timestamp);

TIME_BUCKET_TS(period, timestamp, result, origin);
if (PG_NARGS() > 2)
{
origin = PG_GETARG_TIMESTAMPTZ(2);
}

if (interval->month)
{
DateADT origin_date = 0;
validate_month_bucket(interval);

DateADT date = DatumGetDateADT(DirectFunctionCall1(timestamp_date, PG_GETARG_DATUM(1)));
if (origin != DEFAULT_ORIGIN)
origin_date =
DatumGetDateADT(DirectFunctionCall1(timestamp_date, TimestampTzGetDatum(origin)));

date = bucket_month(interval->month, date, origin_date);

PG_RETURN_DATUM(DirectFunctionCall1(date_timestamp, DateADTGetDatum(date)));
}
else
{
int64 period = get_interval_period_timestamp_units(interval);

PG_RETURN_TIMESTAMPTZ(result);
TIME_BUCKET_TS(period, timestamp, result, origin);

PG_RETURN_TIMESTAMPTZ(result);
}
}

static inline void
Expand Down Expand Up @@ -223,27 +307,41 @@ ts_date_bucket(PG_FUNCTION_ARGS)
{
Interval *interval = PG_GETARG_INTERVAL_P(0);
DateADT date = PG_GETARG_DATEADT(1);
Timestamp origin = DEFAULT_ORIGIN;
DateADT origin = 0;
Timestamp origin_ts = DEFAULT_ORIGIN;
Timestamp timestamp, result;
int64 period = -1;

if (DATE_NOT_FINITE(date))
PG_RETURN_DATEADT(date);

period = get_interval_period_timestamp_units(interval);
/* check the period aligns on a date */
check_period_is_daily(period);

/* convert to timestamp (NOT tz), bucket, convert back to date */
timestamp = DatumGetTimestamp(DirectFunctionCall1(date_timestamp, PG_GETARG_DATUM(1)));
Assert(!TIMESTAMP_NOT_FINITE(timestamp));

if (PG_NARGS() > 2)
origin = DatumGetTimestamp(DirectFunctionCall1(date_timestamp, PG_GETARG_DATUM(2)));
{
origin = PG_GETARG_DATEADT(2);
if (!interval->month)
origin_ts =
DatumGetTimestamp(DirectFunctionCall1(date_timestamp, DateADTGetDatum(origin)));
}

Assert(!TIMESTAMP_NOT_FINITE(timestamp));
if (interval->month)
{
validate_month_bucket(interval);

TIME_BUCKET_TS(period, timestamp, result, origin);
date = bucket_month(interval->month, date, origin);
PG_RETURN_DATEADT(date);
}
else
{
int64 period = get_interval_period_timestamp_units(interval);
/* check the period aligns on a date */
check_period_is_daily(period);

PG_RETURN_DATUM(DirectFunctionCall1(timestamp_date, TimestampGetDatum(result)));
TIME_BUCKET_TS(period, timestamp, result, origin_ts);
PG_RETURN_DATUM(DirectFunctionCall1(timestamp_date, TimestampGetDatum(result)));
}
}

/* when working with time_buckets stored in our catalog, we may not know ahead of time which
Expand Down
70 changes: 66 additions & 4 deletions test/expected/timestamp.out
Expand Up @@ -306,10 +306,10 @@ FROM unnest(ARRAY[
(10 rows)

\set ON_ERROR_STOP 0
SELECT time_bucket(INTERVAL '1 year',TIMESTAMP '2011-01-02 01:01:01.111');
ERROR: interval defined in terms of month, year, century etc. not supported
SELECT time_bucket(INTERVAL '1 month',TIMESTAMP '2011-01-02 01:01:01.111');
ERROR: interval defined in terms of month, year, century etc. not supported
SELECT time_bucket(INTERVAL '1 year 1d',TIMESTAMP '2011-01-02 01:01:01.111');
ERROR: month intervals cannot have day or time component
SELECT time_bucket(INTERVAL '1 month 1 minute',TIMESTAMP '2011-01-02 01:01:01.111');
ERROR: month intervals cannot have day or time component
\set ON_ERROR_STOP 1
SELECT time, time_bucket(INTERVAL '5 minute', time)
FROM unnest(ARRAY[
Expand Down Expand Up @@ -1223,6 +1223,68 @@ FROM unnest(ARRAY[
]) AS time;
ERROR: timestamp out of range
\set ON_ERROR_STOP 1
-------------------------------------------
--- Test time_bucket with month periods ---
-------------------------------------------
SET datestyle TO ISO;
SELECT
time::date,
time_bucket('1 month', time::date) AS "1m",
time_bucket('2 month', time::date) AS "2m",
time_bucket('3 month', time::date) AS "3m",
time_bucket('1 month', time::date, '2000-02-01'::date) AS "1m origin",
time_bucket('2 month', time::date, '2000-02-01'::date) AS "2m origin",
time_bucket('3 month', time::date, '2000-02-01'::date) AS "3m origin"
FROM generate_series('1990-01-03'::date,'1990-06-03'::date,'1month'::interval) time;
time | 1m | 2m | 3m | 1m origin | 2m origin | 3m origin
------------+------------+------------+------------+------------+------------+------------
1990-01-03 | 1990-01-01 | 1990-01-01 | 1990-01-01 | 1990-01-01 | 1989-12-01 | 1989-11-01
1990-02-03 | 1990-02-01 | 1990-01-01 | 1990-01-01 | 1990-02-01 | 1990-02-01 | 1990-02-01
1990-03-03 | 1990-03-01 | 1990-03-01 | 1990-01-01 | 1990-03-01 | 1990-02-01 | 1990-02-01
1990-04-03 | 1990-04-01 | 1990-03-01 | 1990-04-01 | 1990-04-01 | 1990-04-01 | 1990-02-01
1990-05-03 | 1990-05-01 | 1990-05-01 | 1990-04-01 | 1990-05-01 | 1990-04-01 | 1990-05-01
1990-06-03 | 1990-06-01 | 1990-05-01 | 1990-04-01 | 1990-06-01 | 1990-06-01 | 1990-05-01
(6 rows)

SELECT
time,
time_bucket('1 month', time) AS "1m",
time_bucket('2 month', time) AS "2m",
time_bucket('3 month', time) AS "3m",
time_bucket('1 month', time, '2000-02-01'::timestamp) AS "1m origin",
time_bucket('2 month', time, '2000-02-01'::timestamp) AS "2m origin",
time_bucket('3 month', time, '2000-02-01'::timestamp) AS "3m origin"
FROM generate_series('1990-01-03'::timestamp,'1990-06-03'::timestamp,'1month'::interval) time;
time | 1m | 2m | 3m | 1m origin | 2m origin | 3m origin
---------------------+---------------------+---------------------+---------------------+---------------------+---------------------+---------------------
1990-01-03 00:00:00 | 1990-01-01 00:00:00 | 1990-01-01 00:00:00 | 1990-01-01 00:00:00 | 1990-01-01 00:00:00 | 1989-12-01 00:00:00 | 1989-11-01 00:00:00
1990-02-03 00:00:00 | 1990-02-01 00:00:00 | 1990-01-01 00:00:00 | 1990-01-01 00:00:00 | 1990-02-01 00:00:00 | 1990-02-01 00:00:00 | 1990-02-01 00:00:00
1990-03-03 00:00:00 | 1990-03-01 00:00:00 | 1990-03-01 00:00:00 | 1990-01-01 00:00:00 | 1990-03-01 00:00:00 | 1990-02-01 00:00:00 | 1990-02-01 00:00:00
1990-04-03 00:00:00 | 1990-04-01 00:00:00 | 1990-03-01 00:00:00 | 1990-04-01 00:00:00 | 1990-04-01 00:00:00 | 1990-04-01 00:00:00 | 1990-02-01 00:00:00
1990-05-03 00:00:00 | 1990-05-01 00:00:00 | 1990-05-01 00:00:00 | 1990-04-01 00:00:00 | 1990-05-01 00:00:00 | 1990-04-01 00:00:00 | 1990-05-01 00:00:00
1990-06-03 00:00:00 | 1990-06-01 00:00:00 | 1990-05-01 00:00:00 | 1990-04-01 00:00:00 | 1990-06-01 00:00:00 | 1990-06-01 00:00:00 | 1990-05-01 00:00:00
(6 rows)

SELECT
time,
time_bucket('1 month', time) AS "1m",
time_bucket('2 month', time) AS "2m",
time_bucket('3 month', time) AS "3m",
time_bucket('1 month', time, '2000-02-01'::timestamptz) AS "1m origin",
time_bucket('2 month', time, '2000-02-01'::timestamptz) AS "2m origin",
time_bucket('3 month', time, '2000-02-01'::timestamptz) AS "3m origin"
FROM generate_series('1990-01-03'::timestamptz,'1990-06-03'::timestamptz,'1month'::interval) time;
time | 1m | 2m | 3m | 1m origin | 2m origin | 3m origin
------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------
1990-01-03 00:00:00-05 | 1989-12-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1989-11-30 19:00:00-05 | 1989-10-31 19:00:00-05
1990-02-03 00:00:00-05 | 1990-01-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1990-01-31 19:00:00-05 | 1990-01-31 19:00:00-05 | 1990-01-31 19:00:00-05
1990-03-03 00:00:00-05 | 1990-02-28 19:00:00-05 | 1990-02-28 19:00:00-05 | 1989-12-31 19:00:00-05 | 1990-02-28 19:00:00-05 | 1990-01-31 19:00:00-05 | 1990-01-31 19:00:00-05
1990-04-03 00:00:00-04 | 1990-03-31 19:00:00-05 | 1990-02-28 19:00:00-05 | 1990-03-31 19:00:00-05 | 1990-03-31 19:00:00-05 | 1990-03-31 19:00:00-05 | 1990-01-31 19:00:00-05
1990-05-03 00:00:00-04 | 1990-04-30 20:00:00-04 | 1990-04-30 20:00:00-04 | 1990-03-31 19:00:00-05 | 1990-04-30 20:00:00-04 | 1990-03-31 19:00:00-05 | 1990-04-30 20:00:00-04
1990-06-03 00:00:00-04 | 1990-05-31 20:00:00-04 | 1990-04-30 20:00:00-04 | 1990-03-31 19:00:00-05 | 1990-05-31 20:00:00-04 | 1990-05-31 20:00:00-04 | 1990-04-30 20:00:00-04
(6 rows)

RESET datestyle;
------------------------------------------------------------
--- Test timescaledb_experimental.time_bucket_ng function --
------------------------------------------------------------
Expand Down
42 changes: 40 additions & 2 deletions test/sql/timestamp.sql
Expand Up @@ -181,8 +181,8 @@ FROM unnest(ARRAY[
]) AS int_def;

\set ON_ERROR_STOP 0
SELECT time_bucket(INTERVAL '1 year',TIMESTAMP '2011-01-02 01:01:01.111');
SELECT time_bucket(INTERVAL '1 month',TIMESTAMP '2011-01-02 01:01:01.111');
SELECT time_bucket(INTERVAL '1 year 1d',TIMESTAMP '2011-01-02 01:01:01.111');
SELECT time_bucket(INTERVAL '1 month 1 minute',TIMESTAMP '2011-01-02 01:01:01.111');
\set ON_ERROR_STOP 1

SELECT time, time_bucket(INTERVAL '5 minute', time)
Expand Down Expand Up @@ -598,6 +598,44 @@ FROM unnest(ARRAY[
]) AS time;
\set ON_ERROR_STOP 1

-------------------------------------------
--- Test time_bucket with month periods ---
-------------------------------------------

SET datestyle TO ISO;

SELECT
time::date,
time_bucket('1 month', time::date) AS "1m",
time_bucket('2 month', time::date) AS "2m",
time_bucket('3 month', time::date) AS "3m",
time_bucket('1 month', time::date, '2000-02-01'::date) AS "1m origin",
time_bucket('2 month', time::date, '2000-02-01'::date) AS "2m origin",
time_bucket('3 month', time::date, '2000-02-01'::date) AS "3m origin"
FROM generate_series('1990-01-03'::date,'1990-06-03'::date,'1month'::interval) time;

SELECT
time,
time_bucket('1 month', time) AS "1m",
time_bucket('2 month', time) AS "2m",
time_bucket('3 month', time) AS "3m",
time_bucket('1 month', time, '2000-02-01'::timestamp) AS "1m origin",
time_bucket('2 month', time, '2000-02-01'::timestamp) AS "2m origin",
time_bucket('3 month', time, '2000-02-01'::timestamp) AS "3m origin"
FROM generate_series('1990-01-03'::timestamp,'1990-06-03'::timestamp,'1month'::interval) time;

SELECT
time,
time_bucket('1 month', time) AS "1m",
time_bucket('2 month', time) AS "2m",
time_bucket('3 month', time) AS "3m",
time_bucket('1 month', time, '2000-02-01'::timestamptz) AS "1m origin",
time_bucket('2 month', time, '2000-02-01'::timestamptz) AS "2m origin",
time_bucket('3 month', time, '2000-02-01'::timestamptz) AS "3m origin"
FROM generate_series('1990-01-03'::timestamptz,'1990-06-03'::timestamptz,'1month'::interval) time;

RESET datestyle;

------------------------------------------------------------
--- Test timescaledb_experimental.time_bucket_ng function --
------------------------------------------------------------
Expand Down