Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .github/workflows/deploy-book.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Based on https://github.com/rust-lang/mdBook/wiki/Automated-Deployment%3A-GitHub-Actions
name: Deploy mdbook
on:
push:
branches:
- main

jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Install mdbook
run: |
mkdir mdbook
curl -sSL https://github.com/rust-lang/mdBook/releases/download/v0.4.14/mdbook-v0.4.14-x86_64-unknown-linux-gnu.tar.gz | tar -xz --directory=./mdbook
echo `pwd`/mdbook >> $GITHUB_PATH
- name: Deploy GitHub Pages
run: |
# Just add a `cd` here if you need to change to another directory.
cd docs
mdbook build
git worktree add gh-pages
git config user.name "Deploy from CI"
git config user.email ""
cd gh-pages
# Delete the ref to avoid keeping history.
git update-ref -d refs/heads/gh-pages
rm -rf *
mv ../book/* .
mv ../CNAME .
git add .
git commit -m "Deploy $GITHUB_SHA to gh-pages"
git push --force --set-upstream origin gh-pages
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
docs/book/**
4 changes: 4 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM rustprooflabs/pgosm-flex

COPY ./db /app/faker/db

45 changes: 0 additions & 45 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,49 +5,4 @@ components based on OpenStreetMap. The use of OpenStreetMap data
as a starting point provides a sense of realism. The use of `random()`
and to generate fake data avoids privacy concerns.

## Load OpenStreetMap Data

Load the region/subregion you want using the PgOSM Flex Docker image.
The [Quick Start section](https://github.com/rustprooflabs/pgosm-flex#quick-start)
loads the data into a PostGIS enabled database in the Docker container,
available on port 5433.

> Update instructions to use custom layerset. Only need place, road, and land use.



```bash
mkdir ~/pgosm-data
export POSTGRES_USER=postgres
export POSTGRES_PASSWORD=mysecretpassword

docker run --name pgosm -d --rm \
-v ~/pgosm-data:/app/output \
-v ~/git/pgosm-flex-faker/:/custom-layerset \
-v /etc/localtime:/etc/localtime:ro \
-e POSTGRES_PASSWORD=$POSTGRES_PASSWORD \
-p 5433:5432 -d rustprooflabs/pgosm-flex

docker exec -it \
pgosm python3 docker/pgosm_flex.py \
--ram=8 \
--region=north-america/us \
--subregion=ohio \
--layerset=faker_layerset \
--layerset-path=/custom-layerset/
```


After loading, connect and run the `osm-faker.sql`.
Each time running will generate slightly different results.


Version 1

![](osm-faker-stores-in-ohio-1.png)

Version 2

![](osm-faker-stores-in-ohio-2.png)


223 changes: 216 additions & 7 deletions db/deploy/001.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,226 @@ BEGIN;
CREATE SCHEMA pgosm_flex_faker;


CREATE FUNCTION pgosm_flex_faker.location_in_place_landuse()
RETURNS BOOLEAN
LANGUAGE sql
SECURITY DEFINER
SET search_path TO 'pgosm_flex_faker, pg_temp'
CREATE PROCEDURE pgosm_flex_faker.location_in_place_landuse()
LANGUAGE plpgsql
AS $$
BEGIN

-- Do something
-- Define a custom `place_osm_types` table before executing to customize areas
CREATE TEMP TABLE IF NOT EXISTS place_osm_types AS
SELECT 'retail' AS osm_type
UNION
SELECT 'commercial' AS osm_type
;

SELECT True;

-- Basic selection, provide attributes used to rank locations
DROP TABLE IF EXISTS places_for_shops_1;
CREATE TEMP TABLE places_for_shops_1 AS
WITH base AS (
SELECT osm_id, name, osm_type, admin_level, nest_level,
-- Rounding is assuming SRID 3857, or another unit in Meters or Feet.
ROUND(public.ST_Area(geom)::NUMERIC, 0) AS geom_area, geom
FROM osm.place_polygon_nested
-- Using innermost places to reduce likelihood over overlap
WHERE innermost
AND name <> ''
AND admin_level < 99
), with_space AS (
-- Within each Place, find how many places are "near" (intersects)
-- or contain the types of places (commercial, retail, residential, etc)
-- defined in place_osm_types
SELECT b.osm_id,
COUNT(lp.osm_id) AS near_areas,
COALESCE(SUM(public.ST_Area(lp.geom)), 0) AS near_space,
COUNT(c.osm_id) AS contained_areas,
COALESCE(SUM(public.ST_Area(c.geom)), 0) AS contained_space
FROM base b
LEFT JOIN osm.landuse_polygon lp
ON public.ST_Intersects(b.geom, lp.geom)
AND lp.osm_type IN (SELECT osm_type FROM place_osm_types)
LEFT JOIN osm.landuse_polygon c
ON public.ST_Contains(b.geom, c.geom)
AND c.osm_type IN (SELECT osm_type FROM place_osm_types)
GROUP BY b.osm_id
)
SELECT b.*, ws.contained_areas, ws.contained_space,
ws.near_areas, ws.near_space
FROM base b
INNER JOIN with_space ws ON b.osm_id = ws.osm_id
;


DROP TABLE IF EXISTS places_for_shops;
CREATE TEMP TABLE places_for_shops AS
SELECT osm_id, name, osm_type, admin_level, contained_areas, contained_space,
near_areas, near_space, geom_area,
contained_space / geom_area AS space_contained_ratio_higher_is_better,
near_space / geom_area AS space_near_ratio_higher_is_better,
geom
FROM places_for_shops_1
ORDER BY space_contained_ratio_higher_is_better DESC,
space_near_ratio_higher_is_better DESC
;


/*
* The following scoring logic creates scores for each place depending
* on how it's contained and nearby landuse data compare to the area's
* percentile values.
*/
DROP TABLE IF EXISTS place_scores;
CREATE TEMP TABLE place_scores AS
WITH breakpoints AS (
-- Calculate percentiles of space available across all available place inputs
-- This should let each region adjust for the input data
SELECT percentile_cont(0.25)
within group (order by contained_space asc)
as contained_space_25_perc,
percentile_cont(0.50)
within group (order by contained_space asc)
as contained_space_50_perc,
percentile_cont(0.90)
within group (order by near_space asc)
as near_space_90_perc
FROM places_for_shops
WHERE near_areas > 0
)
SELECT p.osm_id,
-- Actual ranking is arbitrary, they key is understanding that scores
-- under a random value in the next step (where random between 0.0 and 1.0)
-- so increasing the max score here results in some areas almost always
-- being picked
CASE WHEN b.contained_space_50_perc < p.contained_space
THEN .55
WHEN b.contained_space_25_perc < p.contained_space
THEN .35
ELSE .01
END AS contained_space_score,
CASE WHEN b.near_space_90_perc < p.near_space
THEN .1
ELSE .01
END AS near_space_score
FROM places_for_shops p
INNER JOIN breakpoints b ON True
-- Excludes places that aren't even nearby (intersects) an appropriate
-- place type
WHERE p.near_areas > 0
;


DROP TABLE IF EXISTS selected;
CREATE TEMP TABLE selected AS
WITH a AS (
SELECT p.osm_id,
s.contained_space_score + s.near_space_score
AS total_score,
random() as rnd
FROM places_for_shops p
INNER JOIN place_scores s
ON p.osm_id = s.osm_id
)
SELECT a.osm_id
FROM a
WHERE a.total_score > a.rnd
;


-- Selected areas to put points into.
DROP TABLE IF EXISTS faker_place_polygon;
CREATE TEMP TABLE faker_place_polygon AS
SELECT p.*
FROM selected s
INNER JOIN places_for_shops p ON s.osm_id = p.osm_id
ORDER BY p.name
;

CREATE INDEX gix_faker_place_polygon
ON faker_place_polygon USING GIST (geom)
;


/*
Ranking roads by osm_type with goal of scoring roads with lower speed
limits higher. Uses helper table loaded by PgOSM Flex.

Uses window function for rank steps, then normalize to 0-1 range.
Finally, squishes range into 0.05 - 0.90 to prevent guarantees of
never or always included.
*/
DROP TABLE IF EXISTS road_osm_type_rank;
CREATE TEMP TABLE road_osm_type_rank AS
WITH rank_lower_speed_better AS (
SELECT osm_type, maxspeed_mph,
RANK() OVER (ORDER BY maxspeed_mph desc) AS rnk_raw
FROM pgosm.road
WHERE route_motor
AND osm_type NOT LIKE '%link'
), aggs_for_normalization AS (
SELECT MIN(rnk_raw) AS min_rnk, MAX(rnk_raw) AS max_rnk
FROM rank_lower_speed_better
), normal_rnk AS (
SELECT r.osm_type, r.maxspeed_mph,
(rnk_raw * 1.0 - min_rnk) / (max_rnk - min_rnk)
AS normalized_rnk
FROM rank_lower_speed_better r
JOIN aggs_for_normalization ON True
)
SELECT osm_type, maxspeed_mph,
CASE WHEN normalized_rnk < 0.05 THEN 0.05
WHEN normalized_rnk > 0.9 THEN .9
ELSE normalized_rnk
END AS normalized_rnk
FROM normal_rnk
;


-----------------------------------------
-- Identify roads where a building could be
-- Not using actual buildings / addresses because:
---- a) privacy
---- b) coverage
DROP TABLE IF EXISTS selected_roads ;
CREATE TEMP TABLE selected_roads AS
WITH road_ranks AS (
SELECT p.osm_id AS place_osm_id, p.name AS place_name,
rr.normalized_rnk AS road_type_score,
r.osm_id AS road_osm_id
FROM faker_place_polygon p
INNER JOIN osm.landuse_polygon c
ON public.ST_Contains(p.geom, c.geom)
AND c.osm_type IN (SELECT osm_type FROM place_osm_types)
INNER JOIN osm.road_line r
ON c.geom && r.geom
AND r.route_motor
AND r.osm_type NOT IN ('service')
AND r.osm_type NOT LIKE '%link'
INNER JOIN road_osm_type_rank rr
ON r.osm_type = rr.osm_type
), ranked AS (
SELECT *,
ROW_NUMBER() OVER (
PARTITION BY place_osm_id
ORDER BY road_type_score DESC, random()) AS rnk
FROM road_ranks
)
SELECT *
FROM ranked
WHERE rnk = 1
;


DROP TABLE IF EXISTS faker_store_location;
CREATE TEMP TABLE faker_store_location AS
SELECT a.place_osm_id, a.place_name, a.road_osm_id,
r.osm_type, r.name, r.ref,
public.ST_LineInterpolatePoint(public.ST_LineMerge(r.geom), random()) AS geom
FROM selected_roads a
INNER JOIN osm.road_line r ON a.road_osm_id = r.osm_id
;


END
$$
;

Expand Down
1 change: 1 addition & 0 deletions docs/CNAME
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
faker.pgosm-flex.com
6 changes: 6 additions & 0 deletions docs/book.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[book]
authors = ['Ryan Lambert', 'RustProof Labs']
language = "en"
multilingual = false
src = "src"
title = "PgOSM Flex Faker"
4 changes: 4 additions & 0 deletions docs/src/SUMMARY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Summary

- [What is PgOSM Flex Faker?](pgosm-flex-faker.md)
- [Quick Start](quick-start.md)
17 changes: 17 additions & 0 deletions docs/src/pgosm-flex-faker.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# What is PgOSM Flex Faker?

The project creates fake store and customer data with geospatial
components based on OpenStreetMap. The use of OpenStreetMap data
as a starting point provides a sense of realism. The use of `random()`
and to generate fake data avoids privacy concerns.

Version 1

![alt](osm-faker-stores-in-ohio-1.png)

Version 2

![alt2](osm-faker-stores-in-ohio-2.png)



Loading