WIP just to get it off my laptop: one test still failing

pjungwir · Nov 29, 2017 · b014627 · b014627
commit b014627
Show file tree

Hide file tree

Showing 10 changed files with 506 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,10 @@
+*.s
+*.o
+*.so
+.*.swp
+.*.swo
+/pgtemporal*.zip
+/regression.diffs
+/regression.out
+/results
+/README.html
diff --git a/Makefile b/Makefile
@@ -0,0 +1,20 @@
+MODULE_big = pgtemporal
+EXTENSION = pgtemporal
+EXTENSION_VERSION = 0.0.1
+DATA = $(EXTENSION)--$(EXTENSION_VERSION).sql
+# REGRESS = $(EXTENSION)_test
+REGRESS = completely_covers_test
+OBJS = pgtemporal.o completely_covers.o $(WIN32RES)
+
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+
+README.html: README.md
+	jq --slurp --raw-input '{"text": "\(.)", "mode": "markdown"}' < README.md | curl --data @- https://api.github.com/markdown > README.html
+
+release:
+	git archive --format zip --prefix=$(EXTENSION)-$(EXTENSION_VERSION)/ --output $(EXTENSION)-$(EXTENSION_VERSION).zip master
+
+.PHONY: release
+
diff --git a/README.md b/README.md
@@ -0,0 +1,17 @@
+pgtemporal
+==========
+
+temporal foreign key constraint
+-------------------------------
+
+There isn't anything about this built-in, so we need to use triggers to check for it.
+Suppose tables `houses` and `rooms` where `rooms` has a `house_id` column that references `houses`.
+Whenever `rooms` is inserted or updated,
+we need to make sure that the corresponding record(s) from `houses` "cover" the duration of that record from `rooms`.
+
+Also whenever `houses` is updated or deleted, we need to check all the `rooms` that reference that house.
+
+If we have a method to validate one `room` record, we can use that in several of these cases.
+This will be [`rooms_house_id_is_covered.sql`].
+
+
diff --git a/completely_covers.c b/completely_covers.c
@@ -0,0 +1,192 @@
+/**
+ * completely_covers.c -
+ * Provides an aggregate function
+ * that tells whether a bunch of input ranges competely cover a target range.
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <string.h>
+#include <time.h>
+
+#include <postgres.h>
+#include <fmgr.h>
+#include <pg_config.h>
+#include <miscadmin.h>
+#include <utils/array.h>
+#include <utils/guc.h>
+#include <utils/acl.h>
+#include <utils/lsyscache.h>
+#include <utils/builtins.h>
+#include <utils/rangetypes.h>
+#include <utils/timestamp.h>
+#include <catalog/pg_type.h>
+#include <catalog/catalog.h>
+#include <catalog/pg_tablespace.h>
+#include <commands/tablespace.h>
+
+
+PG_MODULE_MAGIC;
+
+
+#include "completely_covers.h"
+
+typedef struct completely_covers_state {
+  TimestampTz covered_to;
+  TimestampTz target_start;
+  TimestampTz target_end;
+  bool target_start_unbounded;
+  bool target_end_unbounded;
+  bool answer_is_null;
+  bool finished;    // Used to avoid further processing if we have already succeeded/failed.
+  bool completely_covered;
+} completely_covers_state;
+
+
+Datum completely_covers_transfn(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(completely_covers_transfn);
+
+Datum completely_covers_transfn(PG_FUNCTION_ARGS)
+{
+  MemoryContext aggContext;
+  completely_covers_state *state;
+  RangeType *current_range,
+            *target_range;
+  RangeBound current_start, current_end, target_start, target_end;
+  TypeCacheEntry *typcache;
+  bool current_empty, target_empty;
+  bool first_time;
+
+  if (!AggCheckCallContext(fcinfo, &aggContext)) {
+    elog(ERROR, "completely_covers called in non-aggregate context");
+  }
+
+  if (PG_ARGISNULL(0)) {
+    // Need to use MemoryContextAlloc with aggContext, not just palloc0,
+    // or the state will get cleared in between invocations:
+    state = (completely_covers_state *)MemoryContextAlloc(aggContext, sizeof(completely_covers_state));
+    state->finished = false;
+    state->completely_covered = false;
+    first_time = true;
+
+    // Need to find out the target range:
+
+    // TODO: Technically this will fail to detect an inconsistent target
+    // if only the first row is NULL:
+    if (PG_ARGISNULL(2)) {
+      // return NULL from the whole thing
+      state->answer_is_null = true;
+      state->finished = true;
+      PG_RETURN_POINTER(state);
+    }
+    state->answer_is_null = false;
+
+    target_range = PG_GETARG_RANGE(2);
+    typcache = range_get_typcache(fcinfo, RangeTypeGetOid(target_range));
+    range_deserialize(typcache, target_range, &target_start, &target_end, &target_empty);
+
+    state->target_start_unbounded = target_start.infinite;
+    state->target_end_unbounded = target_end.infinite;
+    state->target_start = DatumGetTimestampTz(target_start.val);
+    state->target_end = DatumGetTimestampTz(target_end.val);
+    // ereport(NOTICE, (errmsg("STARTING: state is [%ld, %ld)   target is [%ld, %ld)", state->target_start, state->target_end, DatumGetTimestampTz(target_start.val), DatumGetTimestampTz(target_end.val))));
+
+    state->covered_to = 0;
+
+  } else {
+    // ereport(NOTICE, (errmsg("looking up state....")));
+    state = (completely_covers_state *)PG_GETARG_POINTER(0);
+
+    // TODO: Is there any better way to exit an aggregation early?
+    // Even https://pgxn.org/dist/first_last_agg/ hits all the input rows:
+    if (state->finished) PG_RETURN_POINTER(state);
+
+    first_time = false;
+
+    // Make sure the second arg is always the same:
+    if (PG_ARGISNULL(2)) {
+      ereport(ERROR, (errmsg("completely_covers second argument must be constant across the group")));
+    }
+    target_range = PG_GETARG_RANGE(2);
+    typcache = range_get_typcache(fcinfo, RangeTypeGetOid(target_range));
+    range_deserialize(typcache, target_range, &target_start, &target_end, &target_empty);
+
+    // ereport(NOTICE, (errmsg("state is [%ld, %ld)   target is [%ld, %ld)", state->target_start, state->target_end, DatumGetTimestampTz(target_start.val), DatumGetTimestampTz(target_end.val))));
+    if (DatumGetTimestampTz(target_start.val) != state->target_start || DatumGetTimestampTz(target_end.val) != state->target_end) {
+      ereport(ERROR, (errmsg("completely_covers second argument must be constant across the group")));
+    }
+  }
+
+  if (PG_ARGISNULL(1)) PG_RETURN_POINTER(state);
+  current_range = PG_GETARG_RANGE(1);
+  typcache = range_get_typcache(fcinfo, RangeTypeGetOid(current_range));
+  range_deserialize(typcache, current_range, &current_start, &current_end, &current_empty);
+
+  // TODO: Deal with current empty/lower-infinite/upper-infinite (tests too)
+
+  // ereport(NOTICE, (errmsg("current is [%ld, %ld)", DatumGetTimestampTz(current_start.val), DatumGetTimestampTz(current_end.val))));
+
+  if (first_time) {
+    if (state->target_start_unbounded && !current_start.infinite) {
+      state->finished = true;
+      state->completely_covered = false;
+      PG_RETURN_POINTER(state);
+    }
+    if (DatumGetTimestampTz(current_start.val) > state->target_start) {
+      state->finished = true;
+      state->completely_covered = false;
+      PG_RETURN_POINTER(state);
+    }
+
+  } else {
+    // If there is a gap then fail:
+    if (DatumGetTimestampTz(current_start.val) > state->covered_to) {
+      // ereport(NOTICE, (errmsg("found a gap")));
+      state->finished = true;
+      state->completely_covered = false;
+      PG_RETURN_POINTER(state);
+    }
+  }
+
+  // This check is why we set covered_to to 0 above on the first pass:
+  if (DatumGetTimestampTz(current_start.val) < state->covered_to) {
+    // Right? Maybe this should be a warning....
+    ereport(ERROR, (errmsg("completely_covered first argument should not have overlaps")));
+  }
+
+  if (current_end.infinite) {
+    state->completely_covered = true;
+    state->finished = true;
+    PG_RETURN_POINTER(state);
+  }
+
+  state->covered_to = DatumGetTimestampTz(current_end.val);
+
+  if (!state->target_end_unbounded && state->covered_to >= state->target_end) {
+    state->completely_covered = true;
+    state->finished = true;
+  }
+
+  PG_RETURN_POINTER(state);
+}
+
+Datum completely_covers_finalfn(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(completely_covers_finalfn);
+
+Datum completely_covers_finalfn(PG_FUNCTION_ARGS)
+{
+  completely_covers_state *state;
+
+  if (PG_ARGISNULL(0)) PG_RETURN_NULL();
+
+  state = (completely_covers_state *)PG_GETARG_POINTER(0);
+  if (state->answer_is_null) {
+    PG_RETURN_NULL();
+  } else {
+    PG_RETURN_BOOL(state->completely_covered);
+  }
+}
diff --git a/completely_covers.h b/completely_covers.h
diff --git a/expected/completely_covers_test.out b/expected/completely_covers_test.out
@@ -0,0 +1,142 @@
+CREATE EXTENSION pgtemporal;
+CREATE TABLE shifts (job_id INTEGER, worker_id INTEGER, valid_at tstzrange);
+INSERT INTO shifts VALUES
+  (1, 1, tstzrange('2017-11-27 06:00:00', '2017-11-27 12:00:00')),
+  (1, 2, tstzrange('2017-11-27 12:00:00', '2017-11-27 17:00:00')),
+  (2, 3, tstzrange('2017-11-27 06:00:00', '2017-11-27 12:00:00')),
+  (2, 4, tstzrange('2017-11-27 13:00:00', '2017-11-27 17:00:00'))
+;
+-- TRUE:
+-- it covers when the range matches one exactly:
+SELECT  completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 12:00:00'))
+FROM    shifts
+WHERE   job_id = 1;
+ completely_covers 
+-------------------
+ t
+(1 row)
+
+-- it covers when the range matches two exactly:
+SELECT  completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 17:00:00'))
+FROM    shifts
+WHERE   job_id = 1;
+ completely_covers 
+-------------------
+ t
+(1 row)
+
+-- it covers when the range has extra in front:
+SELECT  completely_covers(valid_at, tstzrange('2017-11-27 08:00:00', '2017-11-27 17:00:00'))
+FROM    shifts
+WHERE   job_id = 1;
+ completely_covers 
+-------------------
+ t
+(1 row)
+
+-- it covers when the range has extra behind:
+SELECT  completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 14:00:00'))
+FROM    shifts
+WHERE   job_id = 1;
+ completely_covers 
+-------------------
+ t
+(1 row)
+
+-- it covers when the range has extra on both sides:
+SELECT  completely_covers(valid_at, tstzrange('2017-11-27 08:00:00', '2017-11-27 14:00:00'))
+FROM    shifts
+WHERE   job_id = 1;
+ completely_covers 
+-------------------
+ t
+(1 row)
+
+-- FALSE:
+-- it does not cover when the range is null:
+SELECT  completely_covers(NULL, tstzrange('2017-11-27 08:00:00', '2017-11-27 14:00:00'))
+FROM    shifts
+WHERE   job_id = 1;
+ completely_covers 
+-------------------
+ f
+(1 row)
+
+-- it does not cover when the range misses completely:
+SELECT  completely_covers(valid_at, tstzrange('2017-11-29 08:00:00', '2017-11-29 14:00:00'))
+FROM    shifts
+WHERE   job_id = 1;
+ completely_covers 
+-------------------
+ f
+(1 row)
+
+-- it does not cover when the range has something at the beginning:
+SELECT  completely_covers(valid_at, tstzrange('2017-11-27 04:00:00', '2017-11-27 14:00:00'))
+FROM    shifts
+WHERE   job_id = 1;
+ completely_covers 
+-------------------
+ f
+(1 row)
+
+-- it does not cover when the range has something at the end:
+SELECT  completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 20:00:00'))
+FROM    shifts
+WHERE   job_id = 1;
+ completely_covers 
+-------------------
+ f
+(1 row)
+
+-- it does not cover when the range has something in the middle:
+SELECT  completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 17:00:00'))
+FROM    shifts
+WHERE   job_id = 2;
+ completely_covers 
+-------------------
+ f
+(1 row)
+
+-- it does not cover when the range is lower-unbounded:
+SELECT  completely_covers(valid_at, tstzrange(NULL, '2017-11-27 17:00:00'))
+FROM    shifts
+WHERE   job_id = 1;
+ completely_covers 
+-------------------
+ f
+(1 row)
+
+-- it does not cover when the range is upper-unbounded:
+SELECT  completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', NULL))
+FROM    shifts
+WHERE   job_id = 1;
+ completely_covers 
+-------------------
+ f
+(1 row)
+
+-- it does not cover when the range is both-sides-unbounded:
+SELECT  completely_covers(valid_at, tstzrange(NULL, NULL))
+FROM    shifts
+WHERE   job_id = 1;
+ completely_covers 
+-------------------
+ f
+(1 row)
+
+-- NULL:
+-- it is unknown when the target is null:
+SELECT  completely_covers(valid_at, null)
+FROM    shifts
+WHERE   job_id = 1;
+ completely_covers 
+-------------------
+
+(1 row)
+
+-- Errors:
+-- it fails if the input ranges go backwards:
+SELECT  completely_covers(valid_at, tstzrange('2017-11-27 06:00:00', '2017-11-27 17:00:00') ORDER BY worker_id DESC)
+FROM    shifts
+WHERE   job_id = 1;