Skip to content

Commit

Permalink
Make more simple incremental update (#944)
Browse files Browse the repository at this point in the history
Replacing update on the whole table with an update only on changed rows.

The goal is to update more quickly by just updating the changing content.
The update now focus on osm_id of changed rows, it use index. Add a where clause tags != update_tags(tags, geometry) en ensure only update when changed.

It requires one more trigger and a table to store changed osm_id.

The UPDATE is keep in a function to be reusable for initial setup and trigger update.

I try many code layout before done it in this way with the goal to keep the code for initial pass and for update. It should have low impact on initial data load. Better performance for row update can be achieve with BEFORE UPDATE, but require to duplicate the logic.

It is not based on the already merged #896 because calling and update within a function for each updated row was not efficient for larger table (like housenumber).

It addresses #814.


* Remake update_peak_point use incremental update #814

* Make update_aerodrome_label_point use incremental update #814

* Make housenumber_centroid use incremental update #814

* Make update_continent_point use incremental update #814

* Make update_island_point use incremental update #814

* Make update_island_polygon use incremental update #814

* Remove dead code in update_state_point.sql

* Make update_state_point use incremental update #814

* Remove dead code in update_country_point.sql

* Make update_country_point use incremental update #814

* Make osm_poi_polygon use incremental update #814

Thanks @frodrigo
  • Loading branch information
frodrigo committed Aug 28, 2020
1 parent 10efc29 commit bb2a432
Show file tree
Hide file tree
Showing 9 changed files with 366 additions and 103 deletions.
45 changes: 36 additions & 9 deletions layers/aerodrome_label/update_aerodrome_label_point.sql
Original file line number Diff line number Diff line change
@@ -1,25 +1,44 @@
DROP TRIGGER IF EXISTS trigger_flag ON osm_aerodrome_label_point;
DROP TRIGGER IF EXISTS trigger_store ON osm_aerodrome_label_point;
DROP TRIGGER IF EXISTS trigger_refresh ON aerodrome_label.updates;

CREATE SCHEMA IF NOT EXISTS aerodrome_label;

CREATE TABLE IF NOT EXISTS aerodrome_label.osm_ids
(
osm_id bigint
);

-- etldoc: osm_aerodrome_label_point -> osm_aerodrome_label_point
CREATE OR REPLACE FUNCTION update_aerodrome_label_point() RETURNS void AS
CREATE OR REPLACE FUNCTION update_aerodrome_label_point(full_update boolean) RETURNS void AS
$$
BEGIN
UPDATE osm_aerodrome_label_point
SET geometry = ST_Centroid(geometry)
WHERE ST_GeometryType(geometry) <> 'ST_Point';
WHERE (full_update OR osm_id IN (SELECT osm_id FROM aerodrome_label.osm_ids))
AND ST_GeometryType(geometry) <> 'ST_Point';

UPDATE osm_aerodrome_label_point
SET tags = update_tags(tags, geometry)
WHERE COALESCE(tags->'name:latin', tags->'name:nonlatin', tags->'name_int') IS NULL;
END;
$$ LANGUAGE plpgsql;
WHERE (full_update OR osm_id IN (SELECT osm_id FROM aerodrome_label.osm_ids))
AND COALESCE(tags->'name:latin', tags->'name:nonlatin', tags->'name_int') IS NULL
AND tags = update_tags(tags, geometry);
$$ LANGUAGE SQL;

SELECT update_aerodrome_label_point();
SELECT update_aerodrome_label_point(true);

-- Handle updates

CREATE SCHEMA IF NOT EXISTS aerodrome_label;
CREATE OR REPLACE FUNCTION aerodrome_label.store() RETURNS trigger AS
$$
BEGIN
IF (tg_op = 'DELETE') THEN
INSERT INTO aerodrome_label.osm_ids VALUES (OLD.osm_id);
ELSE
INSERT INTO aerodrome_label.osm_ids VALUES (NEW.osm_id);
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

CREATE TABLE IF NOT EXISTS aerodrome_label.updates
(
Expand All @@ -39,13 +58,21 @@ CREATE OR REPLACE FUNCTION aerodrome_label.refresh() RETURNS trigger AS
$$
BEGIN
RAISE LOG 'Refresh aerodrome_label';
PERFORM update_aerodrome_label_point();
PERFORM update_aerodrome_label_point(false);
-- noinspection SqlWithoutWhere
DELETE FROM aerodrome_label.osm_ids;
-- noinspection SqlWithoutWhere
DELETE FROM aerodrome_label.updates;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER trigger_store
AFTER INSERT OR UPDATE OR DELETE
ON osm_aerodrome_label_point
FOR EACH ROW
EXECUTE PROCEDURE aerodrome_label.store();

CREATE TRIGGER trigger_flag
AFTER INSERT OR UPDATE OR DELETE
ON osm_aerodrome_label_point
Expand Down
41 changes: 33 additions & 8 deletions layers/housenumber/housenumber_centroid.sql
Original file line number Diff line number Diff line change
@@ -1,26 +1,43 @@
DROP TRIGGER IF EXISTS trigger_flag ON osm_housenumber_point;
DROP TRIGGER IF EXISTS trigger_store ON osm_housenumber_point;
DROP TRIGGER IF EXISTS trigger_refresh ON housenumber.updates;

CREATE SCHEMA IF NOT EXISTS housenumber;

CREATE TABLE IF NOT EXISTS housenumber.osm_ids
(
osm_id bigint
);

-- etldoc: osm_housenumber_point -> osm_housenumber_point
CREATE OR REPLACE FUNCTION convert_housenumber_point() RETURNS void AS
CREATE OR REPLACE FUNCTION convert_housenumber_point(full_update boolean) RETURNS void AS
$$
BEGIN
UPDATE osm_housenumber_point
SET geometry =
CASE
WHEN ST_NPoints(ST_ConvexHull(geometry)) = ST_NPoints(geometry)
THEN ST_Centroid(geometry)
ELSE ST_PointOnSurface(geometry)
END
WHERE ST_GeometryType(geometry) <> 'ST_Point';
END;
$$ LANGUAGE plpgsql;
WHERE (full_update OR osm_id IN (SELECT osm_id FROM housenumber.osm_ids))
AND ST_GeometryType(geometry) <> 'ST_Point';
$$ LANGUAGE SQL;

SELECT convert_housenumber_point();
SELECT convert_housenumber_point(true);

-- Handle updates

CREATE SCHEMA IF NOT EXISTS housenumber;
CREATE OR REPLACE FUNCTION housenumber.store() RETURNS trigger AS
$$
BEGIN
IF (tg_op = 'DELETE') THEN
INSERT INTO housenumber.osm_ids VALUES (OLD.osm_id);
ELSE
INSERT INTO housenumber.osm_ids VALUES (NEW.osm_id);
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

CREATE TABLE IF NOT EXISTS housenumber.updates
(
Expand All @@ -40,13 +57,21 @@ CREATE OR REPLACE FUNCTION housenumber.refresh() RETURNS trigger AS
$$
BEGIN
RAISE LOG 'Refresh housenumber';
PERFORM convert_housenumber_point();
PERFORM convert_housenumber_point(false);
-- noinspection SqlWithoutWhere
DELETE FROM housenumber.osm_ids;
-- noinspection SqlWithoutWhere
DELETE FROM housenumber.updates;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER trigger_store
AFTER INSERT OR UPDATE OR DELETE
ON osm_housenumber_point
FOR EACH ROW
EXECUTE PROCEDURE housenumber.store();

CREATE TRIGGER trigger_flag
AFTER INSERT OR UPDATE OR DELETE
ON osm_housenumber_point
Expand Down
78 changes: 64 additions & 14 deletions layers/mountain_peak/update_peak_point.sql
Original file line number Diff line number Diff line change
@@ -1,32 +1,82 @@
DROP TRIGGER IF EXISTS trigger_update_point ON osm_peak_point;
DROP TRIGGER IF EXISTS trigger_flag ON osm_peak_point;
DROP TRIGGER IF EXISTS trigger_store ON osm_peak_point;
DROP TRIGGER IF EXISTS trigger_refresh ON mountain_peak_point.updates;

CREATE SCHEMA IF NOT EXISTS mountain_peak_point;

CREATE TABLE IF NOT EXISTS mountain_peak_point.osm_ids
(
osm_id bigint
);

-- etldoc: osm_peak_point -> osm_peak_point
CREATE OR REPLACE FUNCTION update_osm_peak_point(new_osm_id bigint) RETURNS void AS
CREATE OR REPLACE FUNCTION update_osm_peak_point(full_update boolean) RETURNS void AS
$$
UPDATE osm_peak_point
SET tags = update_tags(tags, geometry)
WHERE (new_osm_id IS NULL OR osm_id = new_osm_id)
AND COALESCE(tags -> 'name:latin', tags -> 'name:nonlatin', tags -> 'name_int') IS NULL
AND tags != update_tags(tags, geometry)
UPDATE osm_peak_point
SET tags = update_tags(tags, geometry)
WHERE (full_update OR osm_id IN (SELECT osm_id FROM mountain_peak_point.osm_ids))
AND COALESCE(tags -> 'name:latin', tags -> 'name:nonlatin', tags -> 'name_int') IS NULL
AND tags != update_tags(tags, geometry)
$$ LANGUAGE SQL;

SELECT update_osm_peak_point(NULL);
SELECT update_osm_peak_point(true);

-- Handle updates

CREATE SCHEMA IF NOT EXISTS mountain_peak_point;
CREATE OR REPLACE FUNCTION mountain_peak_point.store() RETURNS trigger AS
$$
BEGIN
IF (tg_op = 'DELETE') THEN
INSERT INTO mountain_peak_point.osm_ids VALUES (OLD.osm_id);
ELSE
INSERT INTO mountain_peak_point.osm_ids VALUES (NEW.osm_id);
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION mountain_peak_point.update() RETURNS trigger AS
CREATE TABLE IF NOT EXISTS mountain_peak_point.updates
(
id serial PRIMARY KEY,
t text,
UNIQUE (t)
);
CREATE OR REPLACE FUNCTION mountain_peak_point.flag() RETURNS trigger AS
$$
BEGIN
PERFORM update_osm_peak_point(new.osm_id);
INSERT INTO mountain_peak_point.updates(t) VALUES ('y') ON CONFLICT(t) DO NOTHING;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

CREATE CONSTRAINT TRIGGER trigger_update_point
AFTER INSERT OR UPDATE
CREATE OR REPLACE FUNCTION mountain_peak_point.refresh() RETURNS trigger AS
$$
BEGIN
RAISE LOG 'Refresh mountain_peak_point';
PERFORM update_osm_peak_point(false);
-- noinspection SqlWithoutWhere
DELETE FROM mountain_peak_point.osm_ids;
-- noinspection SqlWithoutWhere
DELETE FROM mountain_peak_point.updates;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER trigger_store
AFTER INSERT OR UPDATE OR DELETE
ON osm_peak_point
FOR EACH ROW
EXECUTE PROCEDURE mountain_peak_point.store();

CREATE TRIGGER trigger_flag
AFTER INSERT OR UPDATE OR DELETE
ON osm_peak_point
FOR EACH STATEMENT
EXECUTE PROCEDURE mountain_peak_point.flag();

CREATE CONSTRAINT TRIGGER trigger_refresh
AFTER INSERT
ON mountain_peak_point.updates
INITIALLY DEFERRED
FOR EACH ROW
EXECUTE PROCEDURE mountain_peak_point.update();
EXECUTE PROCEDURE mountain_peak_point.refresh();
43 changes: 34 additions & 9 deletions layers/place/update_continent_point.sql
Original file line number Diff line number Diff line change
@@ -1,22 +1,39 @@
DROP TRIGGER IF EXISTS trigger_flag ON osm_continent_point;
DROP TRIGGER IF EXISTS trigger_store ON osm_continent_point;
DROP TRIGGER IF EXISTS trigger_refresh ON place_continent_point.updates;

CREATE SCHEMA IF NOT EXISTS place_continent_point;

CREATE TABLE IF NOT EXISTS place_continent_point.osm_ids
(
osm_id bigint
);

-- etldoc: osm_continent_point -> osm_continent_point
CREATE OR REPLACE FUNCTION update_osm_continent_point() RETURNS void AS
CREATE OR REPLACE FUNCTION update_osm_continent_point(full_update boolean) RETURNS void AS
$$
BEGIN
UPDATE osm_continent_point
SET tags = update_tags(tags, geometry)
WHERE COALESCE(tags->'name:latin', tags->'name:nonlatin', tags->'name_int') IS NULL;
WHERE (full_update OR osm_id IN (SELECT osm_id FROM place_continent_point.osm_ids))
AND COALESCE(tags->'name:latin', tags->'name:nonlatin', tags->'name_int') IS NULL
AND tags != update_tags(tags, geometry);
$$ LANGUAGE SQL;

END;
$$ LANGUAGE plpgsql;

SELECT update_osm_continent_point();
SELECT update_osm_continent_point(true);

-- Handle updates

CREATE SCHEMA IF NOT EXISTS place_continent_point;
CREATE OR REPLACE FUNCTION place_continent_point.store() RETURNS trigger AS
$$
BEGIN
IF (tg_op = 'DELETE') THEN
INSERT INTO place_continent_point.osm_ids VALUES (OLD.osm_id);
ELSE
INSERT INTO place_continent_point.osm_ids VALUES (NEW.osm_id);
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

CREATE TABLE IF NOT EXISTS place_continent_point.updates
(
Expand All @@ -36,13 +53,21 @@ CREATE OR REPLACE FUNCTION place_continent_point.refresh() RETURNS trigger AS
$$
BEGIN
RAISE LOG 'Refresh place_continent_point';
PERFORM update_osm_continent_point();
PERFORM update_osm_continent_point(false);
-- noinspection SqlWithoutWhere
DELETE FROM place_continent_point.osm_ids;
-- noinspection SqlWithoutWhere
DELETE FROM place_continent_point.updates;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER trigger_store
AFTER INSERT OR UPDATE OR DELETE
ON osm_continent_point
FOR EACH ROW
EXECUTE PROCEDURE place_continent_point.store();

CREATE TRIGGER trigger_flag
AFTER INSERT OR UPDATE OR DELETE
ON osm_continent_point
Expand Down
Loading

0 comments on commit bb2a432

Please sign in to comment.