Skip to content

Commit

Permalink
WIP just to get it off my laptop: one test still failing
Browse files Browse the repository at this point in the history
  • Loading branch information
pjungwir committed Nov 29, 2017
0 parents commit b014627
Show file tree
Hide file tree
Showing 10 changed files with 506 additions and 0 deletions.
10 changes: 10 additions & 0 deletions .gitignore
@@ -0,0 +1,10 @@
*.s
*.o
*.so
.*.swp
.*.swo
/pgtemporal*.zip
/regression.diffs
/regression.out
/results
/README.html
20 changes: 20 additions & 0 deletions Makefile
@@ -0,0 +1,20 @@
MODULE_big = pgtemporal
EXTENSION = pgtemporal
EXTENSION_VERSION = 0.0.1
DATA = $(EXTENSION)--$(EXTENSION_VERSION).sql
# REGRESS = $(EXTENSION)_test
REGRESS = completely_covers_test
OBJS = pgtemporal.o completely_covers.o $(WIN32RES)

PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)

README.html: README.md
jq --slurp --raw-input '{"text": "\(.)", "mode": "markdown"}' < README.md | curl --data @- https://api.github.com/markdown > README.html

release:
git archive --format zip --prefix=$(EXTENSION)-$(EXTENSION_VERSION)/ --output $(EXTENSION)-$(EXTENSION_VERSION).zip master

.PHONY: release

17 changes: 17 additions & 0 deletions README.md
@@ -0,0 +1,17 @@
pgtemporal
==========

temporal foreign key constraint
-------------------------------

There isn't anything about this built-in, so we need to use triggers to check for it.
Suppose tables `houses` and `rooms` where `rooms` has a `house_id` column that references `houses`.
Whenever `rooms` is inserted or updated,
we need to make sure that the corresponding record(s) from `houses` "cover" the duration of that record from `rooms`.

Also whenever `houses` is updated or deleted, we need to check all the `rooms` that reference that house.

If we have a method to validate one `room` record, we can use that in several of these cases.
This will be [`rooms_house_id_is_covered.sql`].


192 changes: 192 additions & 0 deletions completely_covers.c
@@ -0,0 +1,192 @@
/**
* completely_covers.c -
* Provides an aggregate function
* that tells whether a bunch of input ranges competely cover a target range.
*/

#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/uio.h>
#include <unistd.h>
#include <string.h>
#include <time.h>

#include <postgres.h>
#include <fmgr.h>
#include <pg_config.h>
#include <miscadmin.h>
#include <utils/array.h>
#include <utils/guc.h>
#include <utils/acl.h>
#include <utils/lsyscache.h>
#include <utils/builtins.h>
#include <utils/rangetypes.h>
#include <utils/timestamp.h>
#include <catalog/pg_type.h>
#include <catalog/catalog.h>
#include <catalog/pg_tablespace.h>
#include <commands/tablespace.h>


PG_MODULE_MAGIC;


#include "completely_covers.h"

typedef struct completely_covers_state {
TimestampTz covered_to;
TimestampTz target_start;
TimestampTz target_end;
bool target_start_unbounded;
bool target_end_unbounded;
bool answer_is_null;
bool finished; // Used to avoid further processing if we have already succeeded/failed.
bool completely_covered;
} completely_covers_state;


Datum completely_covers_transfn(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(completely_covers_transfn);

Datum completely_covers_transfn(PG_FUNCTION_ARGS)
{
MemoryContext aggContext;
completely_covers_state *state;
RangeType *current_range,
*target_range;
RangeBound current_start, current_end, target_start, target_end;
TypeCacheEntry *typcache;
bool current_empty, target_empty;
bool first_time;

if (!AggCheckCallContext(fcinfo, &aggContext)) {
elog(ERROR, "completely_covers called in non-aggregate context");
}

if (PG_ARGISNULL(0)) {
// Need to use MemoryContextAlloc with aggContext, not just palloc0,
// or the state will get cleared in between invocations:
state = (completely_covers_state *)MemoryContextAlloc(aggContext, sizeof(completely_covers_state));
state->finished = false;
state->completely_covered = false;
first_time = true;

// Need to find out the target range:

// TODO: Technically this will fail to detect an inconsistent target
// if only the first row is NULL:
if (PG_ARGISNULL(2)) {
// return NULL from the whole thing
state->answer_is_null = true;
state->finished = true;
PG_RETURN_POINTER(state);
}
state->answer_is_null = false;

target_range = PG_GETARG_RANGE(2);
typcache = range_get_typcache(fcinfo, RangeTypeGetOid(target_range));
range_deserialize(typcache, target_range, &target_start, &target_end, &target_empty);

state->target_start_unbounded = target_start.infinite;
state->target_end_unbounded = target_end.infinite;
state->target_start = DatumGetTimestampTz(target_start.val);
state->target_end = DatumGetTimestampTz(target_end.val);
// ereport(NOTICE, (errmsg("STARTING: state is [%ld, %ld) target is [%ld, %ld)", state->target_start, state->target_end, DatumGetTimestampTz(target_start.val), DatumGetTimestampTz(target_end.val))));

state->covered_to = 0;

} else {
// ereport(NOTICE, (errmsg("looking up state....")));
state = (completely_covers_state *)PG_GETARG_POINTER(0);

// TODO: Is there any better way to exit an aggregation early?
// Even https://pgxn.org/dist/first_last_agg/ hits all the input rows:
if (state->finished) PG_RETURN_POINTER(state);

first_time = false;

// Make sure the second arg is always the same:
if (PG_ARGISNULL(2)) {
ereport(ERROR, (errmsg("completely_covers second argument must be constant across the group")));
}
target_range = PG_GETARG_RANGE(2);
typcache = range_get_typcache(fcinfo, RangeTypeGetOid(target_range));
range_deserialize(typcache, target_range, &target_start, &target_end, &target_empty);

// ereport(NOTICE, (errmsg("state is [%ld, %ld) target is [%ld, %ld)", state->target_start, state->target_end, DatumGetTimestampTz(target_start.val), DatumGetTimestampTz(target_end.val))));
if (DatumGetTimestampTz(target_start.val) != state->target_start || DatumGetTimestampTz(target_end.val) != state->target_end) {
ereport(ERROR, (errmsg("completely_covers second argument must be constant across the group")));
}
}

if (PG_ARGISNULL(1)) PG_RETURN_POINTER(state);
current_range = PG_GETARG_RANGE(1);
typcache = range_get_typcache(fcinfo, RangeTypeGetOid(current_range));
range_deserialize(typcache, current_range, &current_start, &current_end, &current_empty);

// TODO: Deal with current empty/lower-infinite/upper-infinite (tests too)

// ereport(NOTICE, (errmsg("current is [%ld, %ld)", DatumGetTimestampTz(current_start.val), DatumGetTimestampTz(current_end.val))));

if (first_time) {
if (state->target_start_unbounded && !current_start.infinite) {
state->finished = true;
state->completely_covered = false;
PG_RETURN_POINTER(state);
}
if (DatumGetTimestampTz(current_start.val) > state->target_start) {
state->finished = true;
state->completely_covered = false;
PG_RETURN_POINTER(state);
}

} else {
// If there is a gap then fail:
if (DatumGetTimestampTz(current_start.val) > state->covered_to) {
// ereport(NOTICE, (errmsg("found a gap")));
state->finished = true;
state->completely_covered = false;
PG_RETURN_POINTER(state);
}
}

// This check is why we set covered_to to 0 above on the first pass:
if (DatumGetTimestampTz(current_start.val) < state->covered_to) {
// Right? Maybe this should be a warning....
ereport(ERROR, (errmsg("completely_covered first argument should not have overlaps")));
}

if (current_end.infinite) {
state->completely_covered = true;
state->finished = true;
PG_RETURN_POINTER(state);
}

state->covered_to = DatumGetTimestampTz(current_end.val);

if (!state->target_end_unbounded && state->covered_to >= state->target_end) {
state->completely_covered = true;
state->finished = true;
}

PG_RETURN_POINTER(state);
}

Datum completely_covers_finalfn(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(completely_covers_finalfn);

Datum completely_covers_finalfn(PG_FUNCTION_ARGS)
{
completely_covers_state *state;

if (PG_ARGISNULL(0)) PG_RETURN_NULL();

state = (completely_covers_state *)PG_GETARG_POINTER(0);
if (state->answer_is_null) {
PG_RETURN_NULL();
} else {
PG_RETURN_BOOL(state->completely_covered);
}
}
Empty file added completely_covers.h
Empty file.
142 changes: 142 additions & 0 deletions expected/completely_covers_test.out
@@ -0,0 +1,142 @@
CREATE EXTENSION pgtemporal;
CREATE TABLE shifts (job_id INTEGER, worker_id INTEGER, valid_at tstzrange);
INSERT INTO shifts VALUES
(1, 1, tstzrange('2017-11-27 06:00:00', '2017-11-27 12:00:00')),
(1, 2, tstzrange('2017-11-27 12:00:00', '2017-11-27 17:00:00')),
(2, 3, tstzrange('2017-11-27 06:00:00', '2017-11-27 12:00:00')),
(2, 4, tstzrange('2017-11-27 13:00:00', '2017-11-27 17:00:00'))
;
-- TRUE:
-- it covers when the range matches one exactly:
SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 12:00:00'))
FROM shifts
WHERE job_id = 1;
completely_covers
-------------------
t
(1 row)

-- it covers when the range matches two exactly:
SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 17:00:00'))
FROM shifts
WHERE job_id = 1;
completely_covers
-------------------
t
(1 row)

-- it covers when the range has extra in front:
SELECT completely_covers(valid_at, tstzrange('2017-11-27 08:00:00', '2017-11-27 17:00:00'))
FROM shifts
WHERE job_id = 1;
completely_covers
-------------------
t
(1 row)

-- it covers when the range has extra behind:
SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 14:00:00'))
FROM shifts
WHERE job_id = 1;
completely_covers
-------------------
t
(1 row)

-- it covers when the range has extra on both sides:
SELECT completely_covers(valid_at, tstzrange('2017-11-27 08:00:00', '2017-11-27 14:00:00'))
FROM shifts
WHERE job_id = 1;
completely_covers
-------------------
t
(1 row)

-- FALSE:
-- it does not cover when the range is null:
SELECT completely_covers(NULL, tstzrange('2017-11-27 08:00:00', '2017-11-27 14:00:00'))
FROM shifts
WHERE job_id = 1;
completely_covers
-------------------
f
(1 row)

-- it does not cover when the range misses completely:
SELECT completely_covers(valid_at, tstzrange('2017-11-29 08:00:00', '2017-11-29 14:00:00'))
FROM shifts
WHERE job_id = 1;
completely_covers
-------------------
f
(1 row)

-- it does not cover when the range has something at the beginning:
SELECT completely_covers(valid_at, tstzrange('2017-11-27 04:00:00', '2017-11-27 14:00:00'))
FROM shifts
WHERE job_id = 1;
completely_covers
-------------------
f
(1 row)

-- it does not cover when the range has something at the end:
SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 20:00:00'))
FROM shifts
WHERE job_id = 1;
completely_covers
-------------------
f
(1 row)

-- it does not cover when the range has something in the middle:
SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 17:00:00'))
FROM shifts
WHERE job_id = 2;
completely_covers
-------------------
f
(1 row)

-- it does not cover when the range is lower-unbounded:
SELECT completely_covers(valid_at, tstzrange(NULL, '2017-11-27 17:00:00'))
FROM shifts
WHERE job_id = 1;
completely_covers
-------------------
f
(1 row)

-- it does not cover when the range is upper-unbounded:
SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', NULL))
FROM shifts
WHERE job_id = 1;
completely_covers
-------------------
f
(1 row)

-- it does not cover when the range is both-sides-unbounded:
SELECT completely_covers(valid_at, tstzrange(NULL, NULL))
FROM shifts
WHERE job_id = 1;
completely_covers
-------------------
f
(1 row)

-- NULL:
-- it is unknown when the target is null:
SELECT completely_covers(valid_at, null)
FROM shifts
WHERE job_id = 1;
completely_covers
-------------------

(1 row)

-- Errors:
-- it fails if the input ranges go backwards:
SELECT completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 17:00:00') ORDER BY worker_id DESC)
FROM shifts
WHERE job_id = 1;

0 comments on commit b014627

Please sign in to comment.