Skip to content

Commit

Permalink
Move to using macro for time_bucket_ts
Browse files Browse the repository at this point in the history
Macro is used for 2 reasons:
1) It's more correct in that it doesn't mix Timestamp and TimestampTz
types. There is no implicit conversion of the two beneath the hood.
2) It is slightly faster as it avoid an extra function call. This
is a very performance sensitive function for OLAP queries.
  • Loading branch information
cevian committed Oct 15, 2018
1 parent 297d885 commit debd914
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 121 deletions.
184 changes: 75 additions & 109 deletions src/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -275,66 +275,74 @@ get_interval_period_timestamp_units(Interval *interval)
#endif
}

#ifdef HAVE_INT64_TIMESTAMP
#define JAN_3_2000 (2 * USECS_PER_DAY)
#else
#define JAN_3_2000 (2 * SECS_PER_DAY)
#endif

/*
* The default origin is Monday 2000-01-03. We don't use PG epoch since it starts on a saturday.
* This makes time-buckets by a week more intuitive and aligns it with
* date_trunc.
*/
#define DEFAULT_ORIGIN (JAN_3_2000)
#define TIME_BUCKET_TS(period, timestamp, result, shift) \
do \
{ \
if (period <= 0) \
ereport(ERROR, \
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
errmsg("period must be greater then 0"))); \
/* shift = shift % period, but use TMODULO */ \
TMODULO(shift, result, period); \
\
if ((shift > 0 && timestamp < DT_NOBEGIN + shift) \
|| (shift < 0 && timestamp > DT_NOEND + shift)) \
ereport(ERROR, \
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), \
errmsg("timestamp out of range"))); \
timestamp -= shift; \
\
/* result = (timestamp / period) * period */ \
TMODULO(timestamp, result, period); \
if (timestamp < 0) \
{ \
/* \
* need to subtract another period if remainder < 0 this only happens \
* if timestamp is negative to begin with and there is a remainder \
* after division. Need to subtract another period since division \
* truncates toward 0 in C99. \
*/ \
result = (result * period) - period; \
} \
else \
result *= period; \
\
result += shift; \
} while (0)


TS_FUNCTION_INFO_V1(ts_timestamp_bucket);
Datum
ts_timestamp_bucket(PG_FUNCTION_ARGS)
{
Interval *interval = PG_GETARG_INTERVAL_P(0);
Timestamp timestamp = PG_GETARG_TIMESTAMP(1);
Timestamp origin;
Timestamp result;
int64 period = -1;

if (PG_NARGS() > 2)
origin = PG_GETARG_TIMESTAMPTZ(2);
else
{
/*
* The default origin moves the PG epoch to start on a monday:
* 2000-01-03. We don't use PG epoch since it starts on a saturday.
* This makes time-buckets by a week more intuitive and aligns it with
* date_trunc.
*/
#ifdef HAVE_INT64_TIMESTAMP
origin = 2 * USECS_PER_DAY;
#else
origin = 2 * SECS_PER_DAY;
#endif
}
/*
* USE NARGS and not IS_NULL to differentiate a NULL argument from a call
* with 2 parameters
*/
Timestamp origin = (PG_NARGS() > 2 ? PG_GETARG_TIMESTAMP(2) : DEFAULT_ORIGIN);
Timestamp result;
int64 period = get_interval_period_timestamp_units(interval);

if (TIMESTAMP_NOT_FINITE(timestamp))
PG_RETURN_TIMESTAMP(timestamp);

if (origin > 0 && timestamp < DT_NOBEGIN + origin)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));

if (origin < 0 && timestamp > DT_NOEND + origin)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));

timestamp -= origin;
TIME_BUCKET_TS(period, timestamp, result, origin);

period = get_interval_period_timestamp_units(interval);
/* result = (timestamp / period) * period */
TMODULO(timestamp, result, period);
if (timestamp < 0)
{
/*
* need to subtract another period if remainder < 0 this only happens
* if timestamp is negative to begin with and there is a remainder
* after division. Need to subtract another period since division
* truncates toward 0 in C99.
*/
result = (result * period) - period;
}
else
{
result *= period;
}
result += origin;
PG_RETURN_TIMESTAMP(result);
}

Expand All @@ -344,61 +352,20 @@ ts_timestamptz_bucket(PG_FUNCTION_ARGS)
{
Interval *interval = PG_GETARG_INTERVAL_P(0);
TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1);
TimestampTz origin;
TimestampTz result;
int64 period = -1;


if (PG_NARGS() > 2)
origin = PG_GETARG_TIMESTAMPTZ(2);
else
{
/*
* The default origin moves the PG epoch to start on a monday:
* 2000-01-03. We don't use PG epoch since it starts on a saturday.
* This makes time-buckets by a week more intuitive and aligns it with
* date_trunc.
*/
#ifdef HAVE_INT64_TIMESTAMP
origin = 2 * USECS_PER_DAY;
#else
origin = 2 * SECS_PER_DAY;
#endif
}
/*
* USE NARGS and not IS_NULL to differentiate a NULL argument from a call
* with 2 parameters
*/
TimestampTz origin = (PG_NARGS() > 2 ? PG_GETARG_TIMESTAMPTZ(2) : DEFAULT_ORIGIN);
TimestampTz result;
int64 period = get_interval_period_timestamp_units(interval);

if (TIMESTAMP_NOT_FINITE(timestamp))
PG_RETURN_TIMESTAMPTZ(timestamp);

if (origin > 0 && timestamp < DT_NOBEGIN + origin)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
TIME_BUCKET_TS(period, timestamp, result, origin);

if (origin < 0 && timestamp > DT_NOEND + origin)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));

timestamp -= origin;

period = get_interval_period_timestamp_units(interval);
/* result = (timestamp / period) * period */
TMODULO(timestamp, result, period);
if (timestamp < 0)
{
/*
* need to subtract another period if remainder < 0 this only happens
* if timestamp is negative to begin with and there is a remainder
* after division. Need to subtract another period since division
* truncates toward 0 in C99.
*/
result = (result * period) - period;
}
else
{
result *= period;
}
result += origin;
PG_RETURN_TIMESTAMPTZ(result);
}

Expand Down Expand Up @@ -434,8 +401,9 @@ ts_date_bucket(PG_FUNCTION_ARGS)
{
Interval *interval = PG_GETARG_INTERVAL_P(0);
DateADT date = PG_GETARG_DATEADT(1);
Datum converted_ts,
bucketed;
Timestamp origin = DEFAULT_ORIGIN;
Timestamp timestamp,
result;
int64 period = -1;

if (DATE_NOT_FINITE(date))
Expand All @@ -446,19 +414,17 @@ ts_date_bucket(PG_FUNCTION_ARGS)
check_period_is_daily(period);

/* convert to timestamp (NOT tz), bucket, convert back to date */
converted_ts = DirectFunctionCall1(date_timestamp, PG_GETARG_DATUM(1));
timestamp = DatumGetTimestamp(DirectFunctionCall1(date_timestamp, PG_GETARG_DATUM(1)));
if (PG_NARGS() > 2)
{
Datum converted_ts_origin;

converted_ts_origin = DirectFunctionCall1(date_timestamp, PG_GETARG_DATUM(2));
bucketed = DirectFunctionCall3(ts_timestamp_bucket, PG_GETARG_DATUM(0), converted_ts, converted_ts_origin);
}
else
{
bucketed = DirectFunctionCall2(ts_timestamp_bucket, PG_GETARG_DATUM(0), converted_ts);
origin = DatumGetTimestamp(DirectFunctionCall1(date_timestamp, PG_GETARG_DATUM(2)));
}
return DirectFunctionCall1(timestamp_date, bucketed);

Assert(!TIMESTAMP_NOT_FINITE(timestamp));

TIME_BUCKET_TS(period, timestamp, result, origin);

PG_RETURN_DATUM(DirectFunctionCall1(timestamp_date, TimestampGetDatum(result)));
}

/* Returns approximate period in microseconds */
Expand Down
12 changes: 6 additions & 6 deletions test/expected/timestamp.out
Original file line number Diff line number Diff line change
Expand Up @@ -1165,24 +1165,24 @@ FROM unnest(ARRAY[
(5 rows)

\set ON_ERROR_STOP 0
--really old origin + very new data errors
SELECT time, time_bucket(INTERVAL '1 week', time, timestamp without time zone '4710-11-24 01:01:01.0 BC')
--really old origin + very new data + long period errors
SELECT time, time_bucket(INTERVAL '100000 day', time, timestamp without time zone '4710-11-24 01:01:01.0 BC')
FROM unnest(ARRAY[
timestamp without time zone '294270-12-31 23:59:59.9999'
]) AS time;
ERROR: timestamp out of range
SELECT time, time_bucket(INTERVAL '1 week', time, timestamp with time zone '4710-11-25 01:01:01.0 BC')
SELECT time, time_bucket(INTERVAL '100000 day', time, timestamp with time zone '4710-11-25 01:01:01.0 BC')
FROM unnest(ARRAY[
timestamp with time zone '294270-12-30 23:59:59.9999'
]) AS time;
ERROR: timestamp out of range
--really high origin + old data errors out
SELECT time, time_bucket(INTERVAL '1 week', time, timestamp without time zone '294270-12-31 23:59:59.9999')
--really high origin + old data + long period errors out
SELECT time, time_bucket(INTERVAL '10000000 day', time, timestamp without time zone '294270-12-31 23:59:59.9999')
FROM unnest(ARRAY[
timestamp without time zone '4710-11-24 01:01:01.0 BC'
]) AS time;
ERROR: timestamp out of range
SELECT time, time_bucket(INTERVAL '1 week', time, timestamp with time zone '294270-12-31 23:59:59.9999')
SELECT time, time_bucket(INTERVAL '10000000 day', time, timestamp with time zone '294270-12-31 23:59:59.9999')
FROM unnest(ARRAY[
timestamp with time zone '4710-11-24 01:01:01.0 BC'
]) AS time;
Expand Down
12 changes: 6 additions & 6 deletions test/sql/timestamp.sql
Original file line number Diff line number Diff line change
Expand Up @@ -559,22 +559,22 @@ FROM unnest(ARRAY[
]) AS time;

\set ON_ERROR_STOP 0
--really old origin + very new data errors
SELECT time, time_bucket(INTERVAL '1 week', time, timestamp without time zone '4710-11-24 01:01:01.0 BC')
--really old origin + very new data + long period errors
SELECT time, time_bucket(INTERVAL '100000 day', time, timestamp without time zone '4710-11-24 01:01:01.0 BC')
FROM unnest(ARRAY[
timestamp without time zone '294270-12-31 23:59:59.9999'
]) AS time;
SELECT time, time_bucket(INTERVAL '1 week', time, timestamp with time zone '4710-11-25 01:01:01.0 BC')
SELECT time, time_bucket(INTERVAL '100000 day', time, timestamp with time zone '4710-11-25 01:01:01.0 BC')
FROM unnest(ARRAY[
timestamp with time zone '294270-12-30 23:59:59.9999'
]) AS time;

--really high origin + old data errors out
SELECT time, time_bucket(INTERVAL '1 week', time, timestamp without time zone '294270-12-31 23:59:59.9999')
--really high origin + old data + long period errors out
SELECT time, time_bucket(INTERVAL '10000000 day', time, timestamp without time zone '294270-12-31 23:59:59.9999')
FROM unnest(ARRAY[
timestamp without time zone '4710-11-24 01:01:01.0 BC'
]) AS time;
SELECT time, time_bucket(INTERVAL '1 week', time, timestamp with time zone '294270-12-31 23:59:59.9999')
SELECT time, time_bucket(INTERVAL '10000000 day', time, timestamp with time zone '294270-12-31 23:59:59.9999')
FROM unnest(ARRAY[
timestamp with time zone '4710-11-24 01:01:01.0 BC'
]) AS time;
Expand Down

0 comments on commit debd914

Please sign in to comment.