diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d796b08 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +*.s +*.o +*.so +.*.swp +.*.swo +/pgtemporal*.zip +/regression.diffs +/regression.out +/results +/README.html diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..38ac108 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +MODULE_big = pgtemporal +EXTENSION = pgtemporal +EXTENSION_VERSION = 0.0.1 +DATA = $(EXTENSION)--$(EXTENSION_VERSION).sql +# REGRESS = $(EXTENSION)_test +REGRESS = completely_covers_test +OBJS = pgtemporal.o completely_covers.o $(WIN32RES) + +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) + +README.html: README.md + jq --slurp --raw-input '{"text": "\(.)", "mode": "markdown"}' < README.md | curl --data @- https://api.github.com/markdown > README.html + +release: + git archive --format zip --prefix=$(EXTENSION)-$(EXTENSION_VERSION)/ --output $(EXTENSION)-$(EXTENSION_VERSION).zip master + +.PHONY: release + diff --git a/README.md b/README.md new file mode 100644 index 0000000..4cc60e2 --- /dev/null +++ b/README.md @@ -0,0 +1,17 @@ +pgtemporal +========== + +temporal foreign key constraint +------------------------------- + +There isn't anything about this built-in, so we need to use triggers to check for it. +Suppose tables `houses` and `rooms` where `rooms` has a `house_id` column that references `houses`. +Whenever `rooms` is inserted or updated, +we need to make sure that the corresponding record(s) from `houses` "cover" the duration of that record from `rooms`. + +Also whenever `houses` is updated or deleted, we need to check all the `rooms` that reference that house. + +If we have a method to validate one `room` record, we can use that in several of these cases. +This will be [`rooms_house_id_is_covered.sql`]. + + diff --git a/completely_covers.c b/completely_covers.c new file mode 100644 index 0000000..538573b --- /dev/null +++ b/completely_covers.c @@ -0,0 +1,192 @@ +/** + * completely_covers.c - + * Provides an aggregate function + * that tells whether a bunch of input ranges competely cover a target range. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +PG_MODULE_MAGIC; + + +#include "completely_covers.h" + +typedef struct completely_covers_state { + TimestampTz covered_to; + TimestampTz target_start; + TimestampTz target_end; + bool target_start_unbounded; + bool target_end_unbounded; + bool answer_is_null; + bool finished; // Used to avoid further processing if we have already succeeded/failed. + bool completely_covered; +} completely_covers_state; + + +Datum completely_covers_transfn(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(completely_covers_transfn); + +Datum completely_covers_transfn(PG_FUNCTION_ARGS) +{ + MemoryContext aggContext; + completely_covers_state *state; + RangeType *current_range, + *target_range; + RangeBound current_start, current_end, target_start, target_end; + TypeCacheEntry *typcache; + bool current_empty, target_empty; + bool first_time; + + if (!AggCheckCallContext(fcinfo, &aggContext)) { + elog(ERROR, "completely_covers called in non-aggregate context"); + } + + if (PG_ARGISNULL(0)) { + // Need to use MemoryContextAlloc with aggContext, not just palloc0, + // or the state will get cleared in between invocations: + state = (completely_covers_state *)MemoryContextAlloc(aggContext, sizeof(completely_covers_state)); + state->finished = false; + state->completely_covered = false; + first_time = true; + + // Need to find out the target range: + + // TODO: Technically this will fail to detect an inconsistent target + // if only the first row is NULL: + if (PG_ARGISNULL(2)) { + // return NULL from the whole thing + state->answer_is_null = true; + state->finished = true; + PG_RETURN_POINTER(state); + } + state->answer_is_null = false; + + target_range = PG_GETARG_RANGE(2); + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(target_range)); + range_deserialize(typcache, target_range, &target_start, &target_end, &target_empty); + + state->target_start_unbounded = target_start.infinite; + state->target_end_unbounded = target_end.infinite; + state->target_start = DatumGetTimestampTz(target_start.val); + state->target_end = DatumGetTimestampTz(target_end.val); + // ereport(NOTICE, (errmsg("STARTING: state is [%ld, %ld) target is [%ld, %ld)", state->target_start, state->target_end, DatumGetTimestampTz(target_start.val), DatumGetTimestampTz(target_end.val)))); + + state->covered_to = 0; + + } else { + // ereport(NOTICE, (errmsg("looking up state...."))); + state = (completely_covers_state *)PG_GETARG_POINTER(0); + + // TODO: Is there any better way to exit an aggregation early? + // Even https://pgxn.org/dist/first_last_agg/ hits all the input rows: + if (state->finished) PG_RETURN_POINTER(state); + + first_time = false; + + // Make sure the second arg is always the same: + if (PG_ARGISNULL(2)) { + ereport(ERROR, (errmsg("completely_covers second argument must be constant across the group"))); + } + target_range = PG_GETARG_RANGE(2); + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(target_range)); + range_deserialize(typcache, target_range, &target_start, &target_end, &target_empty); + + // ereport(NOTICE, (errmsg("state is [%ld, %ld) target is [%ld, %ld)", state->target_start, state->target_end, DatumGetTimestampTz(target_start.val), DatumGetTimestampTz(target_end.val)))); + if (DatumGetTimestampTz(target_start.val) != state->target_start || DatumGetTimestampTz(target_end.val) != state->target_end) { + ereport(ERROR, (errmsg("completely_covers second argument must be constant across the group"))); + } + } + + if (PG_ARGISNULL(1)) PG_RETURN_POINTER(state); + current_range = PG_GETARG_RANGE(1); + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(current_range)); + range_deserialize(typcache, current_range, ¤t_start, ¤t_end, ¤t_empty); + + // TODO: Deal with current empty/lower-infinite/upper-infinite (tests too) + + // ereport(NOTICE, (errmsg("current is [%ld, %ld)", DatumGetTimestampTz(current_start.val), DatumGetTimestampTz(current_end.val)))); + + if (first_time) { + if (state->target_start_unbounded && !current_start.infinite) { + state->finished = true; + state->completely_covered = false; + PG_RETURN_POINTER(state); + } + if (DatumGetTimestampTz(current_start.val) > state->target_start) { + state->finished = true; + state->completely_covered = false; + PG_RETURN_POINTER(state); + } + + } else { + // If there is a gap then fail: + if (DatumGetTimestampTz(current_start.val) > state->covered_to) { + // ereport(NOTICE, (errmsg("found a gap"))); + state->finished = true; + state->completely_covered = false; + PG_RETURN_POINTER(state); + } + } + + // This check is why we set covered_to to 0 above on the first pass: + if (DatumGetTimestampTz(current_start.val) < state->covered_to) { + // Right? Maybe this should be a warning.... + ereport(ERROR, (errmsg("completely_covered first argument should not have overlaps"))); + } + + if (current_end.infinite) { + state->completely_covered = true; + state->finished = true; + PG_RETURN_POINTER(state); + } + + state->covered_to = DatumGetTimestampTz(current_end.val); + + if (!state->target_end_unbounded && state->covered_to >= state->target_end) { + state->completely_covered = true; + state->finished = true; + } + + PG_RETURN_POINTER(state); +} + +Datum completely_covers_finalfn(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(completely_covers_finalfn); + +Datum completely_covers_finalfn(PG_FUNCTION_ARGS) +{ + completely_covers_state *state; + + if (PG_ARGISNULL(0)) PG_RETURN_NULL(); + + state = (completely_covers_state *)PG_GETARG_POINTER(0); + if (state->answer_is_null) { + PG_RETURN_NULL(); + } else { + PG_RETURN_BOOL(state->completely_covered); + } +} diff --git a/completely_covers.h b/completely_covers.h new file mode 100644 index 0000000..e69de29 diff --git a/expected/completely_covers_test.out b/expected/completely_covers_test.out new file mode 100644 index 0000000..8b45b89 --- /dev/null +++ b/expected/completely_covers_test.out @@ -0,0 +1,142 @@ +CREATE EXTENSION pgtemporal; +CREATE TABLE shifts (job_id INTEGER, worker_id INTEGER, valid_at tstzrange); +INSERT INTO shifts VALUES + (1, 1, tstzrange('2017-11-27 06:00:00', '2017-11-27 12:00:00')), + (1, 2, tstzrange('2017-11-27 12:00:00', '2017-11-27 17:00:00')), + (2, 3, tstzrange('2017-11-27 06:00:00', '2017-11-27 12:00:00')), + (2, 4, tstzrange('2017-11-27 13:00:00', '2017-11-27 17:00:00')) +; +-- TRUE: +-- it covers when the range matches one exactly: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 12:00:00')) +FROM shifts +WHERE job_id = 1; + completely_covers +------------------- + t +(1 row) + +-- it covers when the range matches two exactly: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 17:00:00')) +FROM shifts +WHERE job_id = 1; + completely_covers +------------------- + t +(1 row) + +-- it covers when the range has extra in front: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 08:00:00', '2017-11-27 17:00:00')) +FROM shifts +WHERE job_id = 1; + completely_covers +------------------- + t +(1 row) + +-- it covers when the range has extra behind: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 14:00:00')) +FROM shifts +WHERE job_id = 1; + completely_covers +------------------- + t +(1 row) + +-- it covers when the range has extra on both sides: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 08:00:00', '2017-11-27 14:00:00')) +FROM shifts +WHERE job_id = 1; + completely_covers +------------------- + t +(1 row) + +-- FALSE: +-- it does not cover when the range is null: +SELECT completely_covers(NULL, tstzrange('2017-11-27 08:00:00', '2017-11-27 14:00:00')) +FROM shifts +WHERE job_id = 1; + completely_covers +------------------- + f +(1 row) + +-- it does not cover when the range misses completely: +SELECT completely_covers(valid_at, tstzrange('2017-11-29 08:00:00', '2017-11-29 14:00:00')) +FROM shifts +WHERE job_id = 1; + completely_covers +------------------- + f +(1 row) + +-- it does not cover when the range has something at the beginning: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 04:00:00', '2017-11-27 14:00:00')) +FROM shifts +WHERE job_id = 1; + completely_covers +------------------- + f +(1 row) + +-- it does not cover when the range has something at the end: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 20:00:00')) +FROM shifts +WHERE job_id = 1; + completely_covers +------------------- + f +(1 row) + +-- it does not cover when the range has something in the middle: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 17:00:00')) +FROM shifts +WHERE job_id = 2; + completely_covers +------------------- + f +(1 row) + +-- it does not cover when the range is lower-unbounded: +SELECT completely_covers(valid_at, tstzrange(NULL, '2017-11-27 17:00:00')) +FROM shifts +WHERE job_id = 1; + completely_covers +------------------- + f +(1 row) + +-- it does not cover when the range is upper-unbounded: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', NULL)) +FROM shifts +WHERE job_id = 1; + completely_covers +------------------- + f +(1 row) + +-- it does not cover when the range is both-sides-unbounded: +SELECT completely_covers(valid_at, tstzrange(NULL, NULL)) +FROM shifts +WHERE job_id = 1; + completely_covers +------------------- + f +(1 row) + +-- NULL: +-- it is unknown when the target is null: +SELECT completely_covers(valid_at, null) +FROM shifts +WHERE job_id = 1; + completely_covers +------------------- + +(1 row) + +-- Errors: +-- it fails if the input ranges go backwards: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 17:00:00') ORDER BY worker_id DESC) +FROM shifts +WHERE job_id = 1; diff --git a/pgtemporal--0.0.1.sql b/pgtemporal--0.0.1.sql new file mode 100644 index 0000000..f752df9 --- /dev/null +++ b/pgtemporal--0.0.1.sql @@ -0,0 +1,26 @@ +/* pgtemporal--0.0.1.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION pgtemporal" to load this file. \quit + + + +-- TODO: Make this generic for any range type?: +CREATE OR REPLACE FUNCTION +completely_covers_transfn(internal, tstzrange, tstzrange) +RETURNS internal +AS 'pgtemporal', 'completely_covers_transfn' +LANGUAGE c; + +CREATE OR REPLACE FUNCTION +completely_covers_finalfn(internal, tstzrange, tstzrange) +RETURNS boolean +AS 'pgtemporal', 'completely_covers_finalfn' +LANGUAGE c; + +CREATE AGGREGATE completely_covers(tstzrange, tstzrange) ( + sfunc = completely_covers_transfn, + stype = internal, + finalfunc = completely_covers_finalfn, + finalfunc_extra +); diff --git a/pgtemporal.c b/pgtemporal.c new file mode 100644 index 0000000..e69de29 diff --git a/pgtemporal.control b/pgtemporal.control new file mode 100644 index 0000000..816d6b8 --- /dev/null +++ b/pgtemporal.control @@ -0,0 +1,4 @@ +comment = 'Tools for state-time temporal tables' +default_version = '0.0.1' +module_pathname = '$libdir/pgtemporal' +relocatable = true diff --git a/sql/completely_covers_test.sql b/sql/completely_covers_test.sql new file mode 100644 index 0000000..729ede2 --- /dev/null +++ b/sql/completely_covers_test.sql @@ -0,0 +1,95 @@ +CREATE EXTENSION pgtemporal; +CREATE TABLE shifts (job_id INTEGER, worker_id INTEGER, valid_at tstzrange); +INSERT INTO shifts VALUES + (1, 1, tstzrange('2017-11-27 06:00:00', '2017-11-27 12:00:00')), + (1, 2, tstzrange('2017-11-27 12:00:00', '2017-11-27 17:00:00')), + (2, 3, tstzrange('2017-11-27 06:00:00', '2017-11-27 12:00:00')), + (2, 4, tstzrange('2017-11-27 13:00:00', '2017-11-27 17:00:00')) +; + +-- TRUE: + +-- it covers when the range matches one exactly: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 12:00:00')) +FROM shifts +WHERE job_id = 1; + +-- it covers when the range matches two exactly: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 17:00:00')) +FROM shifts +WHERE job_id = 1; + +-- it covers when the range has extra in front: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 08:00:00', '2017-11-27 17:00:00')) +FROM shifts +WHERE job_id = 1; + +-- it covers when the range has extra behind: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 14:00:00')) +FROM shifts +WHERE job_id = 1; + +-- it covers when the range has extra on both sides: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 08:00:00', '2017-11-27 14:00:00')) +FROM shifts +WHERE job_id = 1; + +-- FALSE: + +-- it does not cover when the range is null: +SELECT completely_covers(NULL, tstzrange('2017-11-27 08:00:00', '2017-11-27 14:00:00')) +FROM shifts +WHERE job_id = 1; + +-- it does not cover when the range misses completely: +SELECT completely_covers(valid_at, tstzrange('2017-11-29 08:00:00', '2017-11-29 14:00:00')) +FROM shifts +WHERE job_id = 1; + +-- it does not cover when the range has something at the beginning: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 04:00:00', '2017-11-27 14:00:00')) +FROM shifts +WHERE job_id = 1; + +-- it does not cover when the range has something at the end: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 20:00:00')) +FROM shifts +WHERE job_id = 1; + +-- it does not cover when the range has something in the middle: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 17:00:00')) +FROM shifts +WHERE job_id = 2; + +-- it does not cover when the range is lower-unbounded: +SELECT completely_covers(valid_at, tstzrange(NULL, '2017-11-27 17:00:00')) +FROM shifts +WHERE job_id = 1; + +-- it does not cover when the range is upper-unbounded: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', NULL)) +FROM shifts +WHERE job_id = 1; + +-- it does not cover when the range is both-sides-unbounded: +SELECT completely_covers(valid_at, tstzrange(NULL, NULL)) +FROM shifts +WHERE job_id = 1; + +-- NULL: + +-- it is unknown when the target is null: +SELECT completely_covers(valid_at, null) +FROM shifts +WHERE job_id = 1; + +-- Errors: + +-- it fails if the input ranges go backwards: +SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 17:00:00') ORDER BY worker_id DESC) +FROM shifts +WHERE job_id = 1; + +-- TODO: handle an empty target range? e.g. [5, 5) +-- Or maybe since that is a self-contradiction maybe ignore that case? +