rustprooflabs · rustprooflabs · Apr 22, 2023 · Apr 22, 2023 · Apr 22, 2023 · Apr 22, 2023
diff --git a/.github/workflows/deploy-book.yml b/.github/workflows/deploy-book.yml
@@ -0,0 +1,36 @@
+# Based on https://github.com/rust-lang/mdBook/wiki/Automated-Deployment%3A-GitHub-Actions
+name: Deploy mdbook
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        fetch-depth: 0
+    - name: Install mdbook
+      run: |
+        mkdir mdbook
+        curl -sSL https://github.com/rust-lang/mdBook/releases/download/v0.4.14/mdbook-v0.4.14-x86_64-unknown-linux-gnu.tar.gz | tar -xz --directory=./mdbook
+        echo `pwd`/mdbook >> $GITHUB_PATH
+    - name: Deploy GitHub Pages
+      run: |
+        # Just add a `cd` here if you need to change to another directory.
+        cd docs
+        mdbook build
+        git worktree add gh-pages
+        git config user.name "Deploy from CI"
+        git config user.email ""
+        cd gh-pages
+        # Delete the ref to avoid keeping history.
+        git update-ref -d refs/heads/gh-pages
+        rm -rf *
+        mv ../book/* .
+        mv ../CNAME .
+        git add .
+        git commit -m "Deploy $GITHUB_SHA to gh-pages"
+        git push --force --set-upstream origin gh-pages
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+docs/book/**
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,4 @@
+FROM rustprooflabs/pgosm-flex
+
+COPY ./db /app/faker/db
+
diff --git a/README.md b/README.md
@@ -5,49 +5,4 @@ components based on OpenStreetMap.  The use of OpenStreetMap data
 as a starting point provides a sense of realism. The use of `random()`
 and to generate fake data avoids privacy concerns.
 
-## Load OpenStreetMap Data
-
-Load the region/subregion you want using the PgOSM Flex Docker image.
-The [Quick Start section](https://github.com/rustprooflabs/pgosm-flex#quick-start)
-loads the data into a PostGIS enabled database in the Docker container,
-available on port 5433.
-
-> Update instructions to use custom layerset.  Only need place, road, and land use.
-
-
-
-```bash
-mkdir ~/pgosm-data
-export POSTGRES_USER=postgres
-export POSTGRES_PASSWORD=mysecretpassword
-
-docker run --name pgosm -d --rm \
-    -v ~/pgosm-data:/app/output \
-    -v ~/git/pgosm-flex-faker/:/custom-layerset \
-    -v /etc/localtime:/etc/localtime:ro \
-    -e POSTGRES_PASSWORD=$POSTGRES_PASSWORD \
-    -p 5433:5432 -d rustprooflabs/pgosm-flex
-
-docker exec -it \
-    pgosm python3 docker/pgosm_flex.py \
-    --ram=8 \
-    --region=north-america/us \
-    --subregion=ohio \
-    --layerset=faker_layerset \
-    --layerset-path=/custom-layerset/ 
-```
-
-
-After loading, connect and run the `osm-faker.sql`.
-Each time running will generate slightly different results.
-
-
-Version 1
-
-![](osm-faker-stores-in-ohio-1.png)
-
-Version 2
-
-![](osm-faker-stores-in-ohio-2.png)
-
 
diff --git a/db/deploy/001.sql b/db/deploy/001.sql
@@ -5,17 +5,226 @@ BEGIN;
 CREATE SCHEMA pgosm_flex_faker;
 
 
-CREATE FUNCTION pgosm_flex_faker.location_in_place_landuse()
- RETURNS BOOLEAN
- LANGUAGE sql
- SECURITY DEFINER
- SET search_path TO 'pgosm_flex_faker, pg_temp'
+CREATE PROCEDURE pgosm_flex_faker.location_in_place_landuse()
+LANGUAGE plpgsql
 AS $$
+BEGIN
 
-	-- Do something
+	-- Define a custom `place_osm_types` table before executing to customize areas
+	CREATE TEMP TABLE IF NOT EXISTS place_osm_types AS
+	SELECT 'retail' AS osm_type
+	UNION
+	SELECT 'commercial' AS osm_type
+	;
 
-	SELECT True;
 
+	-- Basic selection, provide attributes used to rank locations
+	DROP TABLE IF EXISTS places_for_shops_1;
+	CREATE TEMP TABLE places_for_shops_1 AS
+	WITH base AS (
+	SELECT osm_id, name, osm_type, admin_level, nest_level,
+			-- Rounding is assuming SRID 3857, or another unit in Meters or Feet.
+			ROUND(public.ST_Area(geom)::NUMERIC, 0) AS geom_area, geom
+		FROM osm.place_polygon_nested
+		-- Using innermost places to reduce likelihood over overlap
+		WHERE innermost
+			AND name <> ''
+			AND admin_level < 99
+	), with_space AS (
+	-- Within each Place, find how many places are "near" (intersects)
+	-- or contain the types of places (commercial, retail, residential, etc)
+	-- defined in place_osm_types  
+	SELECT b.osm_id,
+			COUNT(lp.osm_id) AS near_areas,
+			COALESCE(SUM(public.ST_Area(lp.geom)), 0) AS near_space,
+			COUNT(c.osm_id) AS contained_areas,
+			COALESCE(SUM(public.ST_Area(c.geom)), 0) AS contained_space
+		FROM base b
+		LEFT JOIN osm.landuse_polygon lp
+			ON public.ST_Intersects(b.geom, lp.geom)
+				AND lp.osm_type IN (SELECT osm_type FROM place_osm_types)
+		LEFT JOIN osm.landuse_polygon c
+			ON public.ST_Contains(b.geom, c.geom)
+				AND c.osm_type IN (SELECT osm_type FROM place_osm_types)
+		GROUP BY b.osm_id
+	)
+	SELECT b.*, ws.contained_areas, ws.contained_space,
+			ws.near_areas, ws.near_space
+		FROM base b
+		INNER JOIN with_space ws ON b.osm_id = ws.osm_id
+	;
+
+
+	DROP TABLE IF EXISTS places_for_shops;
+	CREATE TEMP TABLE places_for_shops AS
+	SELECT osm_id, name, osm_type, admin_level, contained_areas, contained_space,
+			near_areas, near_space, geom_area,
+			contained_space / geom_area AS space_contained_ratio_higher_is_better,
+			near_space / geom_area AS space_near_ratio_higher_is_better,
+			geom
+		FROM places_for_shops_1
+		ORDER BY space_contained_ratio_higher_is_better DESC,
+				space_near_ratio_higher_is_better DESC
+	;
+
+
+	/*
+	* The following scoring logic creates scores for each place depending
+	* on how it's contained and nearby landuse data compare to the area's
+	* percentile values.
+	*/
+	DROP TABLE IF EXISTS place_scores;
+	CREATE TEMP TABLE place_scores AS
+	WITH breakpoints AS (
+	-- Calculate percentiles of space available across all available place inputs
+	-- This should let each region adjust for the input data
+	SELECT percentile_cont(0.25)
+				within group (order by contained_space asc)
+				as contained_space_25_perc,
+			percentile_cont(0.50)
+				within group (order by contained_space asc)
+				as contained_space_50_perc,
+			percentile_cont(0.90)
+				within group (order by near_space asc)
+				as near_space_90_perc
+		FROM places_for_shops
+		WHERE near_areas > 0
+	)
+	SELECT p.osm_id,
+			-- Actual ranking is arbitrary, they key is understanding that scores
+			-- under a random value in the next step (where random between 0.0 and 1.0)
+			-- so increasing the max score here results in some areas almost always
+			-- being picked
+			CASE WHEN b.contained_space_50_perc < p.contained_space
+					THEN .55
+				WHEN b.contained_space_25_perc < p.contained_space
+					THEN .35
+				ELSE .01
+			END AS contained_space_score,
+			CASE WHEN b.near_space_90_perc < p.near_space
+					THEN .1
+				ELSE .01
+			END AS near_space_score
+		FROM places_for_shops p
+		INNER JOIN breakpoints b ON True
+		-- Excludes places that aren't even nearby (intersects) an appropriate
+		-- place type
+		WHERE p.near_areas > 0
+	;
+
+
+	DROP TABLE IF EXISTS selected;
+	CREATE TEMP TABLE selected AS
+	WITH a AS (
+	SELECT p.osm_id,
+			s.contained_space_score + s.near_space_score
+				AS total_score,
+			random() as rnd
+		FROM places_for_shops p
+		INNER JOIN place_scores s
+			ON p.osm_id = s.osm_id
+	)
+	SELECT a.osm_id
+		FROM a  
+		WHERE a.total_score > a.rnd
+	;
+
+
+	-- Selected areas to put points into.
+	DROP TABLE IF EXISTS faker_place_polygon; 
+	CREATE TEMP TABLE faker_place_polygon AS
+	SELECT p.*
+		FROM selected s
+		INNER JOIN places_for_shops p ON s.osm_id = p.osm_id
+		ORDER BY p.name
+	;
+
+	CREATE INDEX gix_faker_place_polygon
+		ON faker_place_polygon USING GIST (geom)
+	;
+
+
+	/*
+		Ranking roads by osm_type with goal of scoring roads with lower speed
+		limits higher.  Uses helper table loaded by PgOSM Flex.
+
+		Uses window function for rank steps, then normalize to 0-1 range.
+		Finally, squishes range into 0.05 - 0.90 to prevent guarantees of
+		never or always included.
+	*/ 
+	DROP TABLE IF EXISTS road_osm_type_rank;
+	CREATE TEMP TABLE road_osm_type_rank AS
+	WITH rank_lower_speed_better AS (
+	SELECT osm_type, maxspeed_mph,
+			RANK() OVER  (ORDER BY maxspeed_mph desc) AS rnk_raw
+		FROM pgosm.road
+		WHERE route_motor
+			AND osm_type NOT LIKE '%link'
+	), aggs_for_normalization AS (
+	SELECT MIN(rnk_raw) AS min_rnk, MAX(rnk_raw) AS max_rnk
+		FROM rank_lower_speed_better
+	), normal_rnk AS (
+	SELECT r.osm_type, r.maxspeed_mph,
+			(rnk_raw * 1.0 - min_rnk) / (max_rnk - min_rnk)
+				AS normalized_rnk
+		FROM rank_lower_speed_better r
+		JOIN aggs_for_normalization ON True
+	)
+	SELECT osm_type, maxspeed_mph,
+			CASE WHEN normalized_rnk < 0.05 THEN 0.05
+				WHEN normalized_rnk > 0.9 THEN .9
+				ELSE normalized_rnk
+				END AS normalized_rnk
+		FROM normal_rnk
+	;
+
+
+	-----------------------------------------
+	-- Identify roads where a building could be
+	-- Not using actual buildings / addresses because:
+	---- a) privacy
+	---- b) coverage
+	DROP TABLE IF EXISTS selected_roads ;
+	CREATE TEMP TABLE selected_roads AS
+	WITH road_ranks AS (
+	SELECT p.osm_id AS place_osm_id, p.name AS place_name,
+			rr.normalized_rnk AS road_type_score,
+			r.osm_id AS road_osm_id
+		FROM faker_place_polygon p
+		INNER JOIN osm.landuse_polygon c
+			ON public.ST_Contains(p.geom, c.geom)
+				AND c.osm_type IN (SELECT osm_type FROM place_osm_types)
+		INNER JOIN osm.road_line r
+			ON c.geom && r.geom
+				AND r.route_motor
+				AND r.osm_type NOT IN ('service')
+				AND r.osm_type NOT LIKE '%link'
+		INNER JOIN road_osm_type_rank rr
+			ON r.osm_type = rr.osm_type
+	), ranked AS (
+	SELECT *,
+			ROW_NUMBER() OVER (
+				PARTITION BY place_osm_id
+				ORDER BY road_type_score DESC, random()) AS rnk
+		FROM road_ranks
+	)
+	SELECT *
+		FROM ranked
+		WHERE rnk = 1
+		;
+
+
+	DROP TABLE IF EXISTS faker_store_location;
+	CREATE TEMP TABLE faker_store_location AS
+	SELECT a.place_osm_id, a.place_name, a.road_osm_id,
+			r.osm_type, r.name, r.ref,
+			public.ST_LineInterpolatePoint(public.ST_LineMerge(r.geom), random()) AS geom
+		FROM selected_roads a
+		INNER JOIN osm.road_line r ON a.road_osm_id = r.osm_id
+	;
+
+
+END
 $$
 ;
 

diff --git a/docs/CNAME b/docs/CNAME
@@ -0,0 +1 @@
+faker.pgosm-flex.com
diff --git a/docs/book.toml b/docs/book.toml
@@ -0,0 +1,6 @@
+[book]
+authors = ['Ryan Lambert', 'RustProof Labs']
+language = "en"
+multilingual = false
+src = "src"
+title = "PgOSM Flex Faker"
diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md
@@ -0,0 +1,4 @@
+# Summary
+
+- [What is PgOSM Flex Faker?](pgosm-flex-faker.md)
+- [Quick Start](quick-start.md)
diff --git a/osm-faker-stores-in-ohio-1.png → docs/src/osm-faker-stores-in-ohio-1.png b/osm-faker-stores-in-ohio-1.png → docs/src/osm-faker-stores-in-ohio-1.png
diff --git a/osm-faker-stores-in-ohio-2.png → docs/src/osm-faker-stores-in-ohio-2.png b/osm-faker-stores-in-ohio-2.png → docs/src/osm-faker-stores-in-ohio-2.png
diff --git a/docs/src/pgosm-flex-faker.md b/docs/src/pgosm-flex-faker.md
@@ -0,0 +1,17 @@
+# What is PgOSM Flex Faker?
+
+The project creates fake store and customer data with geospatial
+components based on OpenStreetMap.  The use of OpenStreetMap data
+as a starting point provides a sense of realism. The use of `random()`
+and to generate fake data avoids privacy concerns.
+
+Version 1
+
+![alt](osm-faker-stores-in-ohio-1.png)
+
+Version 2
+
+![alt2](osm-faker-stores-in-ohio-2.png)
+
+
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,4 @@
		FROM rustprooflabs/pgosm-flex

		COPY ./db /app/faker/db