diff --git a/db/deploy/001.sql b/db/deploy/001.sql index e640f60..2fc3612 100644 --- a/db/deploy/001.sql +++ b/db/deploy/001.sql @@ -2,10 +2,10 @@ BEGIN; -CREATE SCHEMA pgosm_flex_faker; +CREATE SCHEMA geo_faker; -CREATE PROCEDURE pgosm_flex_faker.point_in_place_landuse() +CREATE PROCEDURE geo_faker.point_in_place_landuse() LANGUAGE plpgsql AS $$ BEGIN @@ -115,7 +115,6 @@ BEGIN WHERE p.near_areas > 0 ; - DROP TABLE IF EXISTS selected; CREATE TEMP TABLE selected AS WITH a AS ( @@ -133,7 +132,6 @@ BEGIN WHERE a.total_score > a.rnd ; - -- Selected areas to put points into. DROP TABLE IF EXISTS faker_place_polygon; CREATE TEMP TABLE faker_place_polygon AS @@ -147,7 +145,6 @@ BEGIN ON faker_place_polygon USING GIST (geom) ; - /* Ranking roads by osm_type with goal of scoring roads with lower speed limits higher. Uses helper table loaded by PgOSM Flex. @@ -182,7 +179,6 @@ BEGIN FROM normal_rnk ; - /* Identify roads where a building could be Not using actual buildings / addresses because: @@ -239,12 +235,13 @@ END $$ ; - +COMMENT ON PROCEDURE geo_faker.point_in_place_landuse IS 'Uses osm.landuse_polygon and osm.road_line to simulate probable locations for commercial store locations. Can be customized for custom landuse types by manually defining landuse_osm_types temp table.' +; -- From: https://trac.osgeo.org/postgis/wiki/UserWikiRandomPoint -CREATE FUNCTION pgosm_flex_faker.n_points_in_polygon(geom geometry, num_points integer) +CREATE FUNCTION geo_faker.n_points_in_polygon(geom geometry, num_points integer) RETURNS SETOF geometry LANGUAGE plpgsql VOLATILE COST 100 @@ -291,8 +288,96 @@ END $$ ; +COMMENT ON FUNCTION geo_faker.n_points_in_polygon(GEOMETRY, INT) IS 'Creates N points randomly within the given polygon.'; + + + +-- Call the procedure to ensure the required temp table exists, avoids deploy failure +CALL geo_faker.point_in_place_landuse(); + + +CREATE PROCEDURE geo_faker.points_around_point() + LANGUAGE plpgsql + AS $$ + DECLARE + stores_to_process BIGINT; + t_row faker_store_location%rowtype; + BEGIN + + SELECT COUNT(*) INTO stores_to_process + FROM faker_store_location + ; + RAISE NOTICE 'Stores to process: %', stores_to_process; + +DROP TABLE IF EXISTS faker_customer_location; +CREATE TEMP TABLE faker_customer_location +( + id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY, + store_id BIGINT NOT NULL, + customer_id BIGINT NOT NULL, + geom GEOMETRY(POINT, 3857) NOT NULL +); + FOR t_row IN SELECT * FROM faker_store_location LOOP + RAISE NOTICE 'Store ID: %', t_row.id; + DROP TABLE IF EXISTS place_buffer; + CREATE TEMP TABLE place_buffer AS + SELECT id AS store_id, geom, ST_Buffer(geom, 5000) AS geom_buffer + FROM faker_store_location + WHERE id = t_row.id + ; + + DROP TABLE IF EXISTS store_potential_customers; + CREATE TEMP TABLE store_potential_customers AS + SELECT store_id, + geo_faker.n_points_in_polygon(geom_buffer, 1000) + AS geom + FROM place_buffer + ; + ALTER TABLE store_potential_customers + ADD customer_id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY; + + --SELECT * FROM store_potential_customers; + /* + * Using a CTE here with ST_Envelope to bbox join roads. + * A simple join (which looks innocent) took 45+ seconds to return 141 rows + * while the CTE version takes < 60 ms. + */ + --EXPLAIN (ANALYZE, BUFFERS, VERBOSE, SETTINGS) + WITH possible_roads AS ( + SELECT p.store_id, p.customer_id, p.geom AS geom_customer, + r.geom AS geom_road, + ST_Distance(p.geom, r.geom) AS distance + FROM osm.road_line r + INNER JOIN store_potential_customers p + ON ST_DWithin(r.geom, p.geom, 300) + WHERE r.route_motor + ), ranked AS ( + SELECT *, ROW_NUMBER() OVER ( + PARTITION BY store_id, customer_id ORDER BY distance + ) AS rnk + FROM possible_roads + ) + INSERT INTO faker_customer_location (store_id, customer_id, geom) + SELECT store_id, customer_id, + ST_Snap(geom_customer, geom_road, 300) AS geom_snapped + FROM ranked + WHERE rnk = 1 + ; + COMMIT; + + END LOOP; + +END; +$$; + + +COMMENT ON PROCEDURE geo_faker.points_around_point IS 'Creates fake customer locations around a store. Locations are snapped to roads. Locations not scoped to landuse at this time. Requires faker_store_location temp table with fake store data.'; COMMIT; + + + + diff --git a/db/revert/001.sql b/db/revert/001.sql index 42459da..e8aabe0 100644 --- a/db/revert/001.sql +++ b/db/revert/001.sql @@ -2,6 +2,6 @@ BEGIN; -DROP SCHEMA pgosm_flex_faker CASCADE; +DROP SCHEMA geo_faker CASCADE; COMMIT; diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 1eb6ba9..9720ece 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -1,6 +1,6 @@ # Summary -- [What is PgOSM Flex Faker?](pgosm-flex-faker.md) +- [What is Geo Faker?](geo-faker.md) - [Quick Start](quick-start.md) - [Customize](customize.md) - [Docker image](docker-image.md) diff --git a/docs/src/customize.md b/docs/src/customize.md index 1af7e2d..c17cc33 100644 --- a/docs/src/customize.md +++ b/docs/src/customize.md @@ -2,6 +2,8 @@ This section builds on the [Quick Start](quick-start.md) section. +> Warning: This project is in early development! Things will be changing over the first few releases (e.g. before 0.5.0). + ## External Postgres connections @@ -11,13 +13,22 @@ This approach does load a lot of data to the target database which may not be desired. Consider using `pg_dump` to load only the target data to your database of choice. +The Sqitch deployment step should use additional parameters not set in the quick start +instructions. + +```bash +source ~/.pgosm-faker-local +cd pgosm-flex-faker/db +sqitch db:pg://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB deploy +``` + ## Each time is new data Rerun, save second set. ```sql -CALL pgosm_flex_faker.point_in_place_landuse(); +CALL geo_faker.point_in_place_landuse(); CREATE TABLE my_fake_stores_v2 AS SELECT * FROM faker_store_location @@ -26,7 +37,7 @@ SELECT * ## Custom Places for Shops -The procedure `pgosm_flex_faker.point_in_place_landuse()` allows overriding +The procedure `geo_faker.point_in_place_landuse()` allows overriding the inclusion of `retail` and `commercial` landuse. This is done by creating a custom `landuse_osm_types` table before running the stored procedure. diff --git a/docs/src/docker-image.md b/docs/src/docker-image.md index a0bc772..78401e5 100644 --- a/docs/src/docker-image.md +++ b/docs/src/docker-image.md @@ -1,11 +1,17 @@ # Docker image +> Warning: This project is in early development! Things will be changing over the first few releases (e.g. before 0.5.0). + ## Building the image Build latest. Occasionally run with `--no-cache` to force some software updates. ```bash docker pull rustprooflabs/pgosm-flex:latest -docker build -t rustprooflabs/pgosm-flex-faker:latest . +docker build -t rustprooflabs/geo-faker:latest . ``` + +```bash +docker push rustprooflabs/geo-faker:latest +``` diff --git a/docs/src/geo-faker.md b/docs/src/geo-faker.md new file mode 100644 index 0000000..59d6b07 --- /dev/null +++ b/docs/src/geo-faker.md @@ -0,0 +1,27 @@ +# What is Geo Faker? + +The the Geo Faker project creates fake store and customer data with geospatial +components based on OpenStreetMap. The use of OpenStreetMap data +as a starting point provides a sense of realism. The use of `random()` +and to generate fake data avoids privacy concerns. + + +> Warning: This project is in early development! Things will be changing over the first few releases (e.g. before 0.5.0). + + +GeoFaker builds on [PgOSM Flex](https://pgosm-flex.com/), using its inherit ability +to be customized as a launching point to generate realistic looking, but absolutely fake +geospatial data. + + + +Version 1 + +![alt](osm-faker-stores-in-ohio-1.png) + +Version 2 + +![alt2](osm-faker-stores-in-ohio-2.png) + + + diff --git a/docs/src/pgosm-flex-faker.md b/docs/src/pgosm-flex-faker.md deleted file mode 100644 index dac5210..0000000 --- a/docs/src/pgosm-flex-faker.md +++ /dev/null @@ -1,17 +0,0 @@ -# What is PgOSM Flex Faker? - -The project creates fake store and customer data with geospatial -components based on OpenStreetMap. The use of OpenStreetMap data -as a starting point provides a sense of realism. The use of `random()` -and to generate fake data avoids privacy concerns. - -Version 1 - -![alt](osm-faker-stores-in-ohio-1.png) - -Version 2 - -![alt2](osm-faker-stores-in-ohio-2.png) - - - diff --git a/docs/src/quick-start.md b/docs/src/quick-start.md index 73f13a4..0cf479b 100644 --- a/docs/src/quick-start.md +++ b/docs/src/quick-start.md @@ -1,13 +1,23 @@ -# Quick Start to PgOSM Flex Faker +# Quick Start to Geo Faker -This section covers how to get started with the Faker version of PgOSM Flex. +This section covers how to get started with the Faker version of PgOSM Flex, +also known as Geo Faker. +> Warning: This project is in early development! Things will be changing over the first few releases (e.g. before 0.5.0). + +The basic process to using Geo Faker are: + +* Run PgOSM Flex with custom layerset +* Load PgOSM Flex Faker objects +* + ## Load OpenStreetMap Data Load the region/subregion you want using the PgOSM Flex Docker image. -These instructions are modified from [PgOSM Flex's Quick Start](https://pgosm-flex.com/quick-start.html) section. The following -loads the data into a PostGIS enabled database in a `pgosm-flex-faker` +These instructions are modified from +[PgOSM Flex's Quick Start](https://pgosm-flex.com/quick-start.html) +section. The following loads the data into a PostGIS enabled database in a `geo-faker` Docker container available on port 5433. @@ -16,14 +26,14 @@ mkdir ~/pgosm-data export POSTGRES_USER=postgres export POSTGRES_PASSWORD=mysecretpassword -docker pull rustprooflabs/pgosm-flex-faker:latest +docker pull rustprooflabs/geo-faker:latest docker run --name pgosm-faker -d --rm \ -v ~/pgosm-data:/app/output \ - -v ~/git/pgosm-flex-faker/:/custom-layerset \ + -v ~/git/geo-faker/:/custom-layerset \ -v /etc/localtime:/etc/localtime:ro \ -e POSTGRES_PASSWORD=$POSTGRES_PASSWORD \ - -p 5433:5432 -d rustprooflabs/pgosm-flex-faker + -p 5433:5432 -d rustprooflabs/geo-faker docker exec -it \ pgosm-faker python3 docker/pgosm_flex.py \ @@ -37,12 +47,13 @@ docker exec -it \ ## Load Faker Objects -After the data completes processing, load the PgOSM Flex Faker database structures. +After the data completes processing, load the PgOSM Flex Faker database structures +in the `geo_faker` schema. This is done using Sqitch. ```bash -cd pgosm-flex-faker/db +cd geo-faker/db sqitch db:pg://$POSTGRES_USER:$POSTGRES_PASSWORD@localhost:5433/pgosm deploy ``` @@ -52,7 +63,11 @@ random results. ## Run Faker generation -The stored procedure `pgosm_flex_faker.point_in_place_landuse()` places points +There are two stored procedures in the `geo_faker` schema that +generate the fake stores and customers. + + +The stored procedure `geo_faker.point_in_place_landuse()` places points along roads that are within (or nearby) specific `landuse` areas. The generated data is available after calling the stored procedure in a temporary table named `faker_store_location`. @@ -60,20 +75,35 @@ The generated data is scoped to named places currently, though that will likely become adjustable in the future. +The `geo_faker.point_in_place_landuse()` stored procedure requires +the `faker_store_location` temp table created by the first stored procedure. + + + ```sql -CALL pgosm_flex_faker.point_in_place_landuse(); -SELECT COUNT(*) FROM faker_store_location; +CALL geo_faker.point_in_place_landuse(); +CALL geo_faker.points_around_point(); ``` - The following query saves the data in a new, non-temporary table named `my_fake_stores`. + + + ```sql CREATE TABLE my_fake_stores AS SELECT * FROM faker_store_location ; + +CREATE TABLE my_fake_customers AS +SELECT * + FROM faker_customer_location +; ``` + + +