Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 93 additions & 8 deletions db/deploy/001.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

BEGIN;

CREATE SCHEMA pgosm_flex_faker;
CREATE SCHEMA geo_faker;


CREATE PROCEDURE pgosm_flex_faker.point_in_place_landuse()
CREATE PROCEDURE geo_faker.point_in_place_landuse()
LANGUAGE plpgsql
AS $$
BEGIN
Expand Down Expand Up @@ -115,7 +115,6 @@ BEGIN
WHERE p.near_areas > 0
;


DROP TABLE IF EXISTS selected;
CREATE TEMP TABLE selected AS
WITH a AS (
Expand All @@ -133,7 +132,6 @@ BEGIN
WHERE a.total_score > a.rnd
;


-- Selected areas to put points into.
DROP TABLE IF EXISTS faker_place_polygon;
CREATE TEMP TABLE faker_place_polygon AS
Expand All @@ -147,7 +145,6 @@ BEGIN
ON faker_place_polygon USING GIST (geom)
;


/*
Ranking roads by osm_type with goal of scoring roads with lower speed
limits higher. Uses helper table loaded by PgOSM Flex.
Expand Down Expand Up @@ -182,7 +179,6 @@ BEGIN
FROM normal_rnk
;


/*
Identify roads where a building could be
Not using actual buildings / addresses because:
Expand Down Expand Up @@ -239,12 +235,13 @@ END
$$
;


COMMENT ON PROCEDURE geo_faker.point_in_place_landuse IS 'Uses osm.landuse_polygon and osm.road_line to simulate probable locations for commercial store locations. Can be customized for custom landuse types by manually defining landuse_osm_types temp table.'
;


-- From: https://trac.osgeo.org/postgis/wiki/UserWikiRandomPoint

CREATE FUNCTION pgosm_flex_faker.n_points_in_polygon(geom geometry, num_points integer)
CREATE FUNCTION geo_faker.n_points_in_polygon(geom geometry, num_points integer)
RETURNS SETOF geometry
LANGUAGE plpgsql VOLATILE
COST 100
Expand Down Expand Up @@ -291,8 +288,96 @@ END
$$
;

COMMENT ON FUNCTION geo_faker.n_points_in_polygon(GEOMETRY, INT) IS 'Creates N points randomly within the given polygon.';



-- Call the procedure to ensure the required temp table exists, avoids deploy failure
CALL geo_faker.point_in_place_landuse();


CREATE PROCEDURE geo_faker.points_around_point()
LANGUAGE plpgsql
AS $$
DECLARE
stores_to_process BIGINT;
t_row faker_store_location%rowtype;
BEGIN

SELECT COUNT(*) INTO stores_to_process
FROM faker_store_location
;
RAISE NOTICE 'Stores to process: %', stores_to_process;

DROP TABLE IF EXISTS faker_customer_location;
CREATE TEMP TABLE faker_customer_location
(
id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY,
store_id BIGINT NOT NULL,
customer_id BIGINT NOT NULL,
geom GEOMETRY(POINT, 3857) NOT NULL
);


FOR t_row IN SELECT * FROM faker_store_location LOOP
RAISE NOTICE 'Store ID: %', t_row.id;

DROP TABLE IF EXISTS place_buffer;
CREATE TEMP TABLE place_buffer AS
SELECT id AS store_id, geom, ST_Buffer(geom, 5000) AS geom_buffer
FROM faker_store_location
WHERE id = t_row.id
;

DROP TABLE IF EXISTS store_potential_customers;
CREATE TEMP TABLE store_potential_customers AS
SELECT store_id,
geo_faker.n_points_in_polygon(geom_buffer, 1000)
AS geom
FROM place_buffer
;
ALTER TABLE store_potential_customers
ADD customer_id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY;

--SELECT * FROM store_potential_customers;
/*
* Using a CTE here with ST_Envelope to bbox join roads.
* A simple join (which looks innocent) took 45+ seconds to return 141 rows
* while the CTE version takes < 60 ms.
*/
--EXPLAIN (ANALYZE, BUFFERS, VERBOSE, SETTINGS)
WITH possible_roads AS (
SELECT p.store_id, p.customer_id, p.geom AS geom_customer,
r.geom AS geom_road,
ST_Distance(p.geom, r.geom) AS distance
FROM osm.road_line r
INNER JOIN store_potential_customers p
ON ST_DWithin(r.geom, p.geom, 300)
WHERE r.route_motor
), ranked AS (
SELECT *, ROW_NUMBER() OVER (
PARTITION BY store_id, customer_id ORDER BY distance
) AS rnk
FROM possible_roads
)
INSERT INTO faker_customer_location (store_id, customer_id, geom)
SELECT store_id, customer_id,
ST_Snap(geom_customer, geom_road, 300) AS geom_snapped
FROM ranked
WHERE rnk = 1
;
COMMIT;

END LOOP;

END;
$$;


COMMENT ON PROCEDURE geo_faker.points_around_point IS 'Creates fake customer locations around a store. Locations are snapped to roads. Locations not scoped to landuse at this time. Requires faker_store_location temp table with fake store data.';

COMMIT;




2 changes: 1 addition & 1 deletion db/revert/001.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

BEGIN;

DROP SCHEMA pgosm_flex_faker CASCADE;
DROP SCHEMA geo_faker CASCADE;

COMMIT;
2 changes: 1 addition & 1 deletion docs/src/SUMMARY.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Summary

- [What is PgOSM Flex Faker?](pgosm-flex-faker.md)
- [What is Geo Faker?](geo-faker.md)
- [Quick Start](quick-start.md)
- [Customize](customize.md)
- [Docker image](docker-image.md)
15 changes: 13 additions & 2 deletions docs/src/customize.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

This section builds on the [Quick Start](quick-start.md) section.

> Warning: This project is in early development! Things will be changing over the first few releases (e.g. before 0.5.0).


## External Postgres connections

Expand All @@ -11,13 +13,22 @@ This approach does load a lot of data to the target database which may not be
desired. Consider using `pg_dump` to load only the target data to your
database of choice.

The Sqitch deployment step should use additional parameters not set in the quick start
instructions.

```bash
source ~/.pgosm-faker-local
cd pgosm-flex-faker/db
sqitch db:pg://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB deploy
```


## Each time is new data

Rerun, save second set.

```sql
CALL pgosm_flex_faker.point_in_place_landuse();
CALL geo_faker.point_in_place_landuse();
CREATE TABLE my_fake_stores_v2 AS
SELECT *
FROM faker_store_location
Expand All @@ -26,7 +37,7 @@ SELECT *

## Custom Places for Shops

The procedure `pgosm_flex_faker.point_in_place_landuse()` allows overriding
The procedure `geo_faker.point_in_place_landuse()` allows overriding
the inclusion of `retail` and `commercial` landuse.
This is done by creating a custom `landuse_osm_types` table before
running the stored procedure.
Expand Down
8 changes: 7 additions & 1 deletion docs/src/docker-image.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
# Docker image

> Warning: This project is in early development! Things will be changing over the first few releases (e.g. before 0.5.0).

## Building the image

Build latest. Occasionally run with `--no-cache` to force some software updates.

```bash
docker pull rustprooflabs/pgosm-flex:latest
docker build -t rustprooflabs/pgosm-flex-faker:latest .
docker build -t rustprooflabs/geo-faker:latest .
```


```bash
docker push rustprooflabs/geo-faker:latest
```
27 changes: 27 additions & 0 deletions docs/src/geo-faker.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# What is Geo Faker?

The the Geo Faker project creates fake store and customer data with geospatial
components based on OpenStreetMap. The use of OpenStreetMap data
as a starting point provides a sense of realism. The use of `random()`
and to generate fake data avoids privacy concerns.


> Warning: This project is in early development! Things will be changing over the first few releases (e.g. before 0.5.0).


GeoFaker builds on [PgOSM Flex](https://pgosm-flex.com/), using its inherit ability
to be customized as a launching point to generate realistic looking, but absolutely fake
geospatial data.



Version 1

![alt](osm-faker-stores-in-ohio-1.png)

Version 2

![alt2](osm-faker-stores-in-ohio-2.png)



17 changes: 0 additions & 17 deletions docs/src/pgosm-flex-faker.md

This file was deleted.

56 changes: 43 additions & 13 deletions docs/src/quick-start.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
# Quick Start to PgOSM Flex Faker
# Quick Start to Geo Faker

This section covers how to get started with the Faker version of PgOSM Flex.
This section covers how to get started with the Faker version of PgOSM Flex,
also known as Geo Faker.


> Warning: This project is in early development! Things will be changing over the first few releases (e.g. before 0.5.0).

The basic process to using Geo Faker are:

* Run PgOSM Flex with custom layerset
* Load PgOSM Flex Faker objects
*

## Load OpenStreetMap Data

Load the region/subregion you want using the PgOSM Flex Docker image.
These instructions are modified from [PgOSM Flex's Quick Start](https://pgosm-flex.com/quick-start.html) section. The following
loads the data into a PostGIS enabled database in a `pgosm-flex-faker`
These instructions are modified from
[PgOSM Flex's Quick Start](https://pgosm-flex.com/quick-start.html)
section. The following loads the data into a PostGIS enabled database in a `geo-faker`
Docker container available on port 5433.


Expand All @@ -16,14 +26,14 @@ mkdir ~/pgosm-data
export POSTGRES_USER=postgres
export POSTGRES_PASSWORD=mysecretpassword

docker pull rustprooflabs/pgosm-flex-faker:latest
docker pull rustprooflabs/geo-faker:latest

docker run --name pgosm-faker -d --rm \
-v ~/pgosm-data:/app/output \
-v ~/git/pgosm-flex-faker/:/custom-layerset \
-v ~/git/geo-faker/:/custom-layerset \
-v /etc/localtime:/etc/localtime:ro \
-e POSTGRES_PASSWORD=$POSTGRES_PASSWORD \
-p 5433:5432 -d rustprooflabs/pgosm-flex-faker
-p 5433:5432 -d rustprooflabs/geo-faker

docker exec -it \
pgosm-faker python3 docker/pgosm_flex.py \
Expand All @@ -37,12 +47,13 @@ docker exec -it \

## Load Faker Objects

After the data completes processing, load the PgOSM Flex Faker database structures.
After the data completes processing, load the PgOSM Flex Faker database structures
in the `geo_faker` schema.
This is done using Sqitch.


```bash
cd pgosm-flex-faker/db
cd geo-faker/db
sqitch db:pg://$POSTGRES_USER:$POSTGRES_PASSWORD@localhost:5433/pgosm deploy
```

Expand All @@ -52,28 +63,47 @@ random results.

## Run Faker generation

The stored procedure `pgosm_flex_faker.point_in_place_landuse()` places points
There are two stored procedures in the `geo_faker` schema that
generate the fake stores and customers.


The stored procedure `geo_faker.point_in_place_landuse()` places points
along roads that are within (or nearby) specific `landuse` areas. The generated
data is available after calling the stored procedure in a temporary table
named `faker_store_location`.
The generated data is scoped to named places currently, though that will
likely become adjustable in the future.


The `geo_faker.point_in_place_landuse()` stored procedure requires
the `faker_store_location` temp table created by the first stored procedure.



```sql
CALL pgosm_flex_faker.point_in_place_landuse();
SELECT COUNT(*) FROM faker_store_location;
CALL geo_faker.point_in_place_landuse();
CALL geo_faker.points_around_point();
```


The following query saves the data in a new, non-temporary table named
`my_fake_stores`.






```sql
CREATE TABLE my_fake_stores AS
SELECT *
FROM faker_store_location
;

CREATE TABLE my_fake_customers AS
SELECT *
FROM faker_customer_location
;
```