From 5b0cc0ec60658af1564622054b5d4a6402b9ed34 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Fri, 31 Mar 2023 19:19:37 -0600 Subject: [PATCH 1/3] Moving and restructuring QGIS style code. Fix broken tests for replication/uuid env vars --- docker/db.py | 56 ++------------------- docker/pgosm_flex.py | 17 +++++-- docker/qgis_styles.py | 88 +++++++++++++++++++++++++++++++++ docker/tests/test_geofabrik.py | 11 ++++- docker/tests/test_pgosm_flex.py | 13 +++-- 5 files changed, 123 insertions(+), 62 deletions(-) create mode 100644 docker/qgis_styles.py diff --git a/docker/db.py b/docker/db.py index 5c5264c..2d5cf9d 100644 --- a/docker/db.py +++ b/docker/db.py @@ -8,6 +8,7 @@ import psycopg import sh +import qgis_styles LOGGER = logging.getLogger('pgosm-flex') @@ -216,7 +217,8 @@ def prepare_pgosm_db(data_only, db_path, import_mode): if not data_only: LOGGER.info('Loading extras via Sqitch plus QGIS styles.') run_sqitch_prep(db_path) - load_qgis_styles(db_path) + qgis_styles.load_qgis_styles(db_path=db_path, + db_name=pg_conn_parts()['pg_db']) else: LOGGER.info('Data only mode enabled, no Sqitch or QGIS styles.') @@ -366,58 +368,6 @@ def run_sqitch_prep(db_path): return True -def load_qgis_styles(db_path): - """Loads QGIS style data for easy formatting of most common layers. - - Parameters - ------------------------- - db_path : str - """ - LOGGER.info('Load QGIS styles...') - # These two paths can easily be ran via psycopg - create_path = os.path.join(db_path, - 'qgis-style', - 'create_layer_styles.sql') - load_path = os.path.join(db_path, - 'qgis-style', - '_load_layer_styles.sql') - - with open(create_path, 'r') as file_in: - create_sql = file_in.read() - - with open(load_path, 'r') as file_in: - load_sql = file_in.read() - - with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn: - cur = conn.cursor() - cur.execute(create_sql) - LOGGER.debug('QGIS Style table created') - - # Loading layer_styles data is done from files created by pg_dump, using - # psql to reload is easiest - conn_string = os.environ['PGOSM_CONN'] - cmds_populate = ['psql', '-d', conn_string, - '-f', 'qgis-style/layer_styles.sql'] - - output = subprocess.run(cmds_populate, - text=True, - capture_output=True, - cwd=db_path, - check=False) - - LOGGER.debug(f'Output from loading QGIS style data: {output.stdout}') - - with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn: - cur = conn.cursor() - cur.execute(load_sql) - LOGGER.info('QGIS Style table populated') - - with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn: - sql_clean = 'DELETE FROM public.layer_styles_staging;' - cur = conn.cursor() - cur.execute(sql_clean) - LOGGER.debug('QGIS Style staging table cleaned') - def sqitch_db_string(): """Returns DB string used for Sqitch. diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index 25f1632..df414b7 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -141,7 +141,8 @@ def run_pgosm_flex(ram, region, subregion, data_only, debug, flex_path=paths['flex_path'], ram=ram, skip_nested=skip_nested, - import_mode=import_mode) + import_mode=import_mode, + debug=debug) if not success: msg = 'PgOSM Flex completed with errors. Details in output' @@ -162,7 +163,7 @@ def run_pgosm_flex(ram, region, subregion, data_only, debug, def run_osm2pgsql_standard(input_file, out_path, flex_path, ram, skip_nested, - import_mode): + import_mode, debug): """Runs standard osm2pgsql command and optionally inits for replication (osm2pgsql-replication) mode. @@ -174,6 +175,7 @@ def run_osm2pgsql_standard(input_file, out_path, flex_path, ram, skip_nested, ram : float skip_nested : boolean import_mode : import_mode.ImportMode + debug : boolean Returns --------------------------- @@ -193,7 +195,8 @@ def run_osm2pgsql_standard(input_file, out_path, flex_path, ram, skip_nested, out_path=out_path, import_mode=import_mode) - run_osm2pgsql(osm2pgsql_command=osm2pgsql_command, flex_path=flex_path) + run_osm2pgsql(osm2pgsql_command=osm2pgsql_command, flex_path=flex_path, + debug=debug) if not skip_nested: skip_nested = check_layerset_places(flex_path) @@ -377,17 +380,23 @@ def get_export_full_path(out_path, export_filename): return export_path -def run_osm2pgsql(osm2pgsql_command, flex_path): +def run_osm2pgsql(osm2pgsql_command, flex_path, debug): """Runs the provided osm2pgsql command. Parameters ---------------------- osm2pgsql_command : str flex_path : str + debug : boolean """ logger = logging.getLogger('pgosm-flex') logger.info('Running osm2pgsql') + if debug: + print() + print(osm2pgsql_command) + print() + returncode = helpers.run_command_via_subprocess(cmd=osm2pgsql_command.split(), cwd=flex_path) diff --git a/docker/qgis_styles.py b/docker/qgis_styles.py new file mode 100644 index 0000000..2c4b1ac --- /dev/null +++ b/docker/qgis_styles.py @@ -0,0 +1,88 @@ +"""PgOSM Flex module to handle loading QGIS styles to Postgres. +""" +import logging +import os +import subprocess + +import db + + +LOGGER = logging.getLogger('pgosm-flex') + + +def load_qgis_styles(db_path, db_name): + """Loads QGIS style data for easy formatting of most common layers. + + Parameters + ------------------------- + db_path : str + Base path to pgosm-flex/db directory + db_name : str + """ + LOGGER.info(f'Load QGIS styles to database {db_name}...') + conn_string = os.environ['PGOSM_CONN'] + # These two paths can easily be ran via psycopg + + create_layer_style_table(db_path=db_path, conn_string=conn_string) + populate_layer_style_staging(db_path=db_path, conn_string=conn_string) + + load_path = os.path.join(db_path, + 'qgis-style', + '_load_layer_styles.sql') + + with open(load_path, 'r') as file_in: + load_sql = file_in.read() + + + with db.get_db_conn(conn_string=conn_string) as conn: + cur = conn.cursor() + cur.execute(load_sql) + LOGGER.info('QGIS Style table populated') + + with db.get_db_conn(conn_string=conn_string) as conn: + sql_clean = 'DELETE FROM public.layer_styles_staging;' + cur = conn.cursor() + cur.execute(sql_clean) + LOGGER.debug('QGIS Style staging table cleaned') + + +def create_layer_style_table(db_path, conn_string): + """Ensures QGIS layer styles table exists. + + Parameters + -------------------- + db_path : str + conn_string : path + """ + create_path = os.path.join(db_path, + 'qgis-style', + 'create_layer_styles.sql') + + with open(create_path, 'r') as file_in: + create_sql = file_in.read() + + with db.get_db_conn(conn_string=conn_string) as conn: + cur = conn.cursor() + cur.execute(create_sql) + LOGGER.debug('QGIS Style table created') + + +def populate_layer_style_staging(db_path, conn_string): + """ + Parameters + -------------------- + db_path : str + conn_string : path + """ + # Loading layer_styles data is done from files created by pg_dump, using + # psql to reload is easiest + cmds_populate = ['psql', '-d', conn_string, + '-f', 'qgis-style/layer_styles.sql'] + + output = subprocess.run(cmds_populate, + text=True, + capture_output=True, + cwd=db_path, + check=False) + + LOGGER.debug(f'Output from loading QGIS style data: {output.stdout}') diff --git a/docker/tests/test_geofabrik.py b/docker/tests/test_geofabrik.py index 5b9783e..b6d477e 100644 --- a/docker/tests/test_geofabrik.py +++ b/docker/tests/test_geofabrik.py @@ -1,5 +1,7 @@ """ Unit tests to cover the Geofabrik module.""" import unittest +import uuid + import geofabrik, helpers REGION_US = 'north-america/us' @@ -7,6 +9,7 @@ LAYERSET = 'default' PGOSM_DATE = '2021-12-02' +IMPORT_UUID = uuid.uuid4() class GeofabrikTests(unittest.TestCase): @@ -18,7 +21,9 @@ def setUp(self): pgosm_date=PGOSM_DATE, layerset=LAYERSET, layerset_path=None, - sp_gist=False) + sp_gist=False, + replication=False, + import_uuid=IMPORT_UUID) def tearDown(self): @@ -39,7 +44,9 @@ def test_get_region_filename_returns_region_when_subregion_None(self): pgosm_date=PGOSM_DATE, layerset=LAYERSET, layerset_path=None, - sp_gist=False) + sp_gist=False, + replication=False, + import_uuid=IMPORT_UUID) result = geofabrik.get_region_filename() expected = f'{REGION_US}-latest.osm.pbf' diff --git a/docker/tests/test_pgosm_flex.py b/docker/tests/test_pgosm_flex.py index 73d6e70..c4a422d 100644 --- a/docker/tests/test_pgosm_flex.py +++ b/docker/tests/test_pgosm_flex.py @@ -1,12 +1,15 @@ """ Unit tests to cover the DB module.""" import unittest +import uuid + import pgosm_flex, helpers + REGION_US = 'north-america/us' SUBREGION_DC = 'district-of-columbia' LAYERSET = 'default' PGOSM_DATE = '2021-12-02' - +IMPORT_UUID = uuid.uuid4() class PgOSMFlexTests(unittest.TestCase): @@ -18,7 +21,9 @@ def setUp(self): pgosm_date=PGOSM_DATE, layerset=LAYERSET, layerset_path=None, - sp_gist=False) + sp_gist=False, + replication=False, + import_uuid=IMPORT_UUID) def tearDown(self): @@ -89,7 +94,9 @@ def test_get_export_filename_region_only(self): pgosm_date=PGOSM_DATE, layerset=LAYERSET, layerset_path=None, - sp_gist=False) + sp_gist=False, + replication=False, + import_uuid=IMPORT_UUID) input_file = None result = pgosm_flex.get_export_filename(input_file) From ce7e5c6cdc77f8d92d36b6fed9e3a9e9cadd3db5 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Fri, 31 Mar 2023 19:53:13 -0600 Subject: [PATCH 2/3] Finish restructuring original code, add update to handle different DB and/or schema. --- docker/db.py | 6 ++- docker/pgosm_flex.py | 3 +- docker/qgis_styles.py | 86 ++++++++++++++++++++++++++++++++----------- 3 files changed, 70 insertions(+), 25 deletions(-) diff --git a/docker/db.py b/docker/db.py index 2d5cf9d..b4eff8f 100644 --- a/docker/db.py +++ b/docker/db.py @@ -182,7 +182,7 @@ def pg_isready(): return True -def prepare_pgosm_db(data_only, db_path, import_mode): +def prepare_pgosm_db(data_only, db_path, import_mode, schema_name): """Runs through series of steps to prepare database for PgOSM. Parameters @@ -190,6 +190,7 @@ def prepare_pgosm_db(data_only, db_path, import_mode): data_only : bool db_path : str import_mode : import_mode.ImportMode + schema_name : str """ if pg_conn_parts()['pg_host'] == 'localhost': drop_it = True @@ -218,7 +219,8 @@ def prepare_pgosm_db(data_only, db_path, import_mode): LOGGER.info('Loading extras via Sqitch plus QGIS styles.') run_sqitch_prep(db_path) qgis_styles.load_qgis_styles(db_path=db_path, - db_name=pg_conn_parts()['pg_db']) + db_name=pg_conn_parts()['pg_db'], + schema_name=schema_name) else: LOGGER.info('Data only mode enabled, no Sqitch or QGIS styles.') diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index df414b7..0ea3e45 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -123,7 +123,8 @@ def run_pgosm_flex(ram, region, subregion, data_only, debug, db.prepare_pgosm_db(data_only=data_only, db_path=paths['db_path'], - import_mode=import_mode) + import_mode=import_mode, + schema_name=schema_name) if import_mode.replication_update: # If replication_update, a manual date is not valid. diff --git a/docker/qgis_styles.py b/docker/qgis_styles.py index 2c4b1ac..9b767a8 100644 --- a/docker/qgis_styles.py +++ b/docker/qgis_styles.py @@ -10,7 +10,7 @@ LOGGER = logging.getLogger('pgosm-flex') -def load_qgis_styles(db_path, db_name): +def load_qgis_styles(db_path, db_name, schema_name): """Loads QGIS style data for easy formatting of most common layers. Parameters @@ -18,32 +18,16 @@ def load_qgis_styles(db_path, db_name): db_path : str Base path to pgosm-flex/db directory db_name : str + schema_name : str """ LOGGER.info(f'Load QGIS styles to database {db_name}...') conn_string = os.environ['PGOSM_CONN'] - # These two paths can easily be ran via psycopg create_layer_style_table(db_path=db_path, conn_string=conn_string) populate_layer_style_staging(db_path=db_path, conn_string=conn_string) - - load_path = os.path.join(db_path, - 'qgis-style', - '_load_layer_styles.sql') - - with open(load_path, 'r') as file_in: - load_sql = file_in.read() - - - with db.get_db_conn(conn_string=conn_string) as conn: - cur = conn.cursor() - cur.execute(load_sql) - LOGGER.info('QGIS Style table populated') - - with db.get_db_conn(conn_string=conn_string) as conn: - sql_clean = 'DELETE FROM public.layer_styles_staging;' - cur = conn.cursor() - cur.execute(sql_clean) - LOGGER.debug('QGIS Style staging table cleaned') + update_styles_db_name(db_name=db_name, schema_name=schema_name, + conn_string=conn_string) + load_staging_to_prod(db_path=db_path, conn_string=conn_string) def create_layer_style_table(db_path, conn_string): @@ -68,7 +52,8 @@ def create_layer_style_table(db_path, conn_string): def populate_layer_style_staging(db_path, conn_string): - """ + """Loads data to public.layer_styles_staging using psql + Parameters -------------------- db_path : str @@ -86,3 +71,60 @@ def populate_layer_style_staging(db_path, conn_string): check=False) LOGGER.debug(f'Output from loading QGIS style data: {output.stdout}') + + +def load_staging_to_prod(db_path, conn_string): + """Loads data from public.layer_styles_staging to public.layer_styles. + + Parameters + -------------------- + db_path : str + conn_string : path + """ + load_path = os.path.join(db_path, + 'qgis-style', + '_load_layer_styles.sql') + + with open(load_path, 'r') as file_in: + load_sql = file_in.read() + + with db.get_db_conn(conn_string=conn_string) as conn: + cur = conn.cursor() + cur.execute(load_sql) + + LOGGER.info('QGIS Style table populated') + + with db.get_db_conn(conn_string=conn_string) as conn: + sql_clean = 'DELETE FROM public.layer_styles_staging;' + cur = conn.cursor() + cur.execute(sql_clean) + + LOGGER.debug('QGIS Style staging table cleaned') + + +def update_styles_db_name(db_name, schema_name, conn_string): + """ + Parameters + ---------------------- + db_name : str + schema_name : str + conn_string : str + """ + if db_name == 'pgosm' and schema_name == 'osm': + LOGGER.debug('Database name and schema name set to defaults. Update to layer styles not necessary') + return + + sql_raw = """ +UPDATE public.layer_styles_staging + SET f_table_catalog = %(db_name)s , + f_table_schema = %(schema_name)s +; +""" + params = {'db_name': db_name, 'schema_name': schema_name} + with db.get_db_conn(conn_string=conn_string) as conn: + cur = conn.cursor() + cur.execute(sql_raw, params=params) + conn.commit() + + LOGGER.info(f'Updated QGIS layer styles for {db_name}.{schema_name}') + From 10a38ac6c7533471f6e94a4976bbcffeadff00d7 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Fri, 31 Mar 2023 20:14:23 -0600 Subject: [PATCH 3/3] Various documentation improvements, QGIS, troubleshooting, load times. --- db/qgis-style/README.md | 2 +- docs/src/SUMMARY.md | 1 + docs/src/performance.md | 160 ++++++++++++++++++++++-------------- docs/src/qgis-styles.md | 30 ++----- docs/src/troubleshooting.md | 91 ++++++++++++++++++++ 5 files changed, 196 insertions(+), 88 deletions(-) create mode 100644 docs/src/troubleshooting.md diff --git a/db/qgis-style/README.md b/db/qgis-style/README.md index ec9d593..2e2d899 100644 --- a/db/qgis-style/README.md +++ b/db/qgis-style/README.md @@ -1 +1 @@ -Documentation moved to https://pgosm-flex.com +Documentation moved to https://pgosm-flex.com/qgis-styles.html diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index e44530f..e953e81 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -29,4 +29,5 @@ - [Build and Push Docker Images](./docker-build.md) - [Testing PgOSM Flex](./tests.md) - [Developing QGIS Styles](./qgis-styles-dev.md) +- [Troubleshoot errors in osm2pgsql processing](./troubleshooting.md) diff --git a/docs/src/performance.md b/docs/src/performance.md index e1c7fed..92d9ee9 100644 --- a/docs/src/performance.md +++ b/docs/src/performance.md @@ -1,83 +1,53 @@ # Processing Time -This page provides timings for how long PgOSM Flex runs for various region sizes. -The server used to host these tests has 8 vCPU and 64 GB RAM to match the target -server size [outlined in the osm2pgsql manual](https://osm2pgsql.org/doc/manual.html#preparing-the-database). +The purpose of this page is to provide a rough guideline of what to +expect for how long PgOSM Flex processing will take. +Two server sizes are used for this testing hosted by Digital Ocean. +The larger size server +has 8 vCPU and 64 GB RAM to match the target +server size [outlined in the osm2pgsql manual](https://osm2pgsql.org/doc/manual.html#preparing-the-database). The current matching Digital Ocean +resource class is the Memory-Optimized with dedicated CPU resources. +This comes with a 200 GB SSD. The cost for this class of instance +is $0.500 / hour, or $336 / month. A good number of production Postgres +instances can run on this hardware. +The smaller server size is a budget friendly 2 AMD vCPU and 2 GB RAM +on shared CPU resources. The cost for this class of instance is +$0.031 / hour, or $21 / month. -## Versions Tested - -Versions used for testing: PgOSM Flex 0.4.7 Docker image, based on the official -PostGIS image with Postgres 14 / PostGIS 3.2. - -Note: Postgres 15 [made GIST indexes faster](https://osm2pgsql.org/news/2023/01/22/faster-with-postgresql15.html) -to create. These timings will be updated in the future with the latest versions. - - -## Layerset: Minimal - -The `minimal` layer set only loads major roads, places, and POIs. - -Timings with nested admin polygons and dumping the processed data to a `.sql` -file. - - -| Sub-region | PBF Size | PostGIS Size | `.sql` Size | Import Time | -| :--- | :-: | :-: | :-: | :-: | -| District of Columbia | 18 MB | 36 MB | 14 MB | 15.3 sec | -| Colorado | 226 MB | 181 MB | 129 MB | 1 min 23 sec | -| Norway | 1.1 GB | 618 MB | 489 MB | 5 min 36 sec | -| North America | 12 GB | 9.5 GB | 7.7 GB | 3.03 hours | -Timings skipping nested admin polygons the dump to `.sql`. This adds -`--skip-dump --skip-nested` to the `docker exec process`. The following -table compares the import time using these skips against the full times reported -above. - - -| Sub-region | Import Time (full) | Import Time (skips) | -| :--- | :-: | :-: | -| District of Columbia | 15.3 sec | 15.0 sec | -| Colorado | 1 min 23 sec | 1 min 21 sec | -| Norway | 5 min 36 sec | 5 min 12 sec | -| North America | 3.03 hours | 1.25 hours | - +## Versions Tested -## Layerset: Default +Versions used for testing: PgOSM Flex 0.7.1 Docker image, based on the official +PostGIS image with Postgres 15.2 / PostGIS 3.3. osm2pgsql 1.8.1. -The `default` layer set.... -Timings with nested admin polygons and dumping the processed data to a `.sql` -file. +Note: Postgres 15 [made GIST indexes faster](https://osm2pgsql.org/news/2023/01/22/faster-with-postgresql15.html) +to create. Using an version prior to Postgres 14 will likely take longer. -| Sub-region | PBF Size | PostGIS Size | `.sql` Size | Import Time | -| :--- | :-: | :-: | :-: | :-: | -| District of Columbia | 18 MB | 212 MB | 160 MB | 53 sec | -| Colorado | 226 MB | 2.1 GB | 1.9 GB | 8 min 20 sec | -| Norway | 1.1 GB | 7.2 GB | 6.5 GB | 33 min 44 sec | -| North America | 12 GB | 98 GB | 55 GB | 8.78 hours | +## Methodology +Create instance, Ubuntu 22.04. -Timings skipping nested admin polygons the dump to `.sql`. This adds -`--skip-dump --skip-nested` to the `docker exec process`. The following -table compares the import time using these skips against the full times reported -above. +```bash +sudo apt update \ + && sudo apt upgrade -y \ + && sudo apt autoremove -y \ + && sudo apt install -y apt-transport-https ca-certificates curl software-properties-common \ + && curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \ + && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null \ + && sudo apt update \ + && sudo apt install docker-ce \ + && sudo reboot -h now +``` -| Sub-region | Import Time (full) | Import Time (skips) | -| :--- | :-: | :-: | -| District of Columbia | 53 sec | 51 sec | -| Colorado | 8 min 20 sec | 7 min 55 sec | -| Norway | 33 min 44 sec | 32 min 18 sec | -| North America | 8.78 hours | 6.58 hours | -## Methodology - The timing for the first `docker exec` for each region was discarded as it included the timing for downloading the PBF file. @@ -113,7 +83,7 @@ docker run --name pgosm -d --rm \ -v ~/pgosm-data:/app/output \ -v /etc/localtime:/etc/localtime:ro \ -e POSTGRES_PASSWORD=$POSTGRES_PASSWORD \ - -p 5433:5432 -d rustprooflabs/pgosm-flex \ + -p 5433:5432 -d rustprooflabs/pgosm-flex:0.7.1 \ -c shared_buffers=1GB \ -c work_mem=50MB \ -c maintenance_work_mem=10GB \ @@ -145,3 +115,69 @@ time docker exec -it \ --layerset=minimal ``` + + + + +## Layerset: Minimal + +The `minimal` layer set only loads major roads, places, and POIs. + +Timings with nested admin polygons and dumping the processed data to a `.sql` +file. + + +| Sub-region | PBF Size | PostGIS Size | `.sql` Size | Import Time | +| :--- | :-: | :-: | :-: | :-: | +| District of Columbia | 18 MB | 36 MB | 14 MB | 15.3 sec | +| Colorado | 226 MB | 181 MB | 129 MB | 1 min 23 sec | +| Norway | 1.1 GB | 618 MB | 489 MB | 5 min 36 sec | +| North America | 12 GB | 9.5 GB | 7.7 GB | 3.03 hours | + + + +Timings skipping nested admin polygons the dump to `.sql`. This adds +`--skip-dump --skip-nested` to the `docker exec process`. The following +table compares the import time using these skips against the full times reported +above. + + +| Sub-region | Import Time (full) | Import Time (skips) | +| :--- | :-: | :-: | +| District of Columbia | 15.3 sec | 15.0 sec | +| Colorado | 1 min 23 sec | 1 min 21 sec | +| Norway | 5 min 36 sec | 5 min 12 sec | +| North America | 3.03 hours | 1.25 hours | + + +## Layerset: Default + +The `default` layer set.... + +Timings with nested admin polygons and dumping the processed data to a `.sql` +file. + + +| Sub-region | PBF Size | PostGIS Size | `.sql` Size | Import Time | +| :--- | :-: | :-: | :-: | :-: | +| District of Columbia | 18 MB | 212 MB | 160 MB | 53 sec | +| Colorado | 226 MB | 2.1 GB | 1.9 GB | 8 min 20 sec | +| Norway | 1.1 GB | 7.2 GB | 6.5 GB | 33 min 44 sec | +| North America | 12 GB | 98 GB | 55 GB | 8.78 hours | + + + +Timings skipping nested admin polygons the dump to `.sql`. This adds +`--skip-dump --skip-nested` to the `docker exec process`. The following +table compares the import time using these skips against the full times reported +above. + + +| Sub-region | Import Time (full) | Import Time (skips) | +| :--- | :-: | :-: | +| District of Columbia | 53 sec | 51 sec | +| Colorado | 8 min 20 sec | 7 min 55 sec | +| Norway | 33 min 44 sec | 32 min 18 sec | +| North America | 8.78 hours | 6.58 hours | + + diff --git a/docs/src/qgis-styles.md b/docs/src/qgis-styles.md index 279ec93..b555d17 100644 --- a/docs/src/qgis-styles.md +++ b/docs/src/qgis-styles.md @@ -3,38 +3,18 @@ If you use QGIS to visualize OpenStreetMap, there are a few basic styles using the `public.layer_styles` table created by QGIS. -This data is loaded by default and can be excluded with `--data-only`. - +This data is loaded by default. Run PgOSM Flex with `--data-only` to skip loading +this data. QGIS can save its styling information directly in a table in the Postgres database using a table `public.layer_styles`. -## Prepare - -The `create_layer_styles.sql` script creates the `public.layer_styles` table defined in QGIS 3.16 along with an additional `public.layer_styles_staging` table used to prepare -data before loading. - -```bash -psql -d pgosm -f create_layer_styles.sql -``` - -Load styles to staging. - -```bash -psql -d pgosm -f layer_styles.sql -``` - - -To use these styles as defaults, update the `f_table_catalog` and -`f_table_schema` values in the staging table. The defaults are -`f_table_catalog='pgosm'` and `f_table_schema='osm'`. - ```sql -UPDATE public.layer_styles_staging - SET f_table_catalog = 'your_db', - f_table_schema = 'osm' +SELECT f_table_catalog, f_table_schema, f_table_name, stylename, + useasdefault, description + FROM public.layer_styles ; ``` diff --git a/docs/src/troubleshooting.md b/docs/src/troubleshooting.md new file mode 100644 index 0000000..aee25bf --- /dev/null +++ b/docs/src/troubleshooting.md @@ -0,0 +1,91 @@ +# Troubleshoot errors in osm2pgsql processing + +This section contains rough notes about how to troubleshoot errors in PgOSM Flex. + +## Reduce `--ram` + +If you encounter an unusual failure during the `osm2pgsql` step of PgOSM Flex, +try reducing the `--ram` value. Choosing a `--ram` option too high can cause +the process to fail with a variety of unexpected errors. If that isn't the problem, +continue reading. + + +## Docker logs + +Output such as this. + +```bash +2023-02-26 22:14:31,760:INFO:pgosm-flex:helpers:Processing: Node(10k 10.0k/s) Way(0k 0.00k/s) Relation(0Processing: Node(84760k 277.9k/s) Way(0k 0.00k/s) Relation(0 0.0/s) +2023-02-26 22:14:31,774:ERROR:pgosm-flex:pgosm_flex:Failed to run osm2pgsql. Return code: -9 +Failed to run osm2pgsql. Return code: -9 - Check the log output for details +``` + + +Checking logs from Docker might shed light on issue. + +```bash +docker logs pgosm +``` + +``` +2023-02-26 22:14:31.777 UTC [114] LOG: incomplete message from client +2023-02-26 22:14:31.777 UTC [114] CONTEXT: COPY tags, line 1 +2023-02-26 22:14:31.777 UTC [114] STATEMENT: COPY "osm"."tags" ("geom_type","osm_id","tags") FROM STDIN +2023-02-26 22:14:31.807 UTC [114] ERROR: unexpected EOF on client connection with an open transaction +2023-02-26 22:14:31.807 UTC [114] CONTEXT: COPY tags, line 1 +2023-02-26 22:14:31.807 UTC [114] STATEMENT: COPY "osm"."tags" ("geom_type","osm_id","tags") FROM STDIN +2023-02-26 22:14:31.812 UTC [114] FATAL: terminating connection because protocol synchronization was lost +2023-02-26 22:14:31.822 UTC [114] LOG: could not send data to client: Broken pipe +``` + +## Troubleshoot within Docker + +Enter the docker container into `/bin/bash`. + +```bash +docker exec -it pgosm /bin/bash +``` + +Set environment variables required for PgOSM Flex's operation. + +```bash +export PGOSM_CONN=postgresql://postgres:mysecretpassword@localhost:5432/pgosm?application_name=pgosm-flex +export PGOSM_REPLICATION=False +export PGOSM_IMPORT_UUID=this-is-not-a-real-uuid +export PGOSM_LAYERSET=minimal +``` + +Run `osm2pgsql` manually. Start with a simple operation shown below, +consider adding adding `-v` and/or `--log-sql-data` to the `osm2pgsql` command +to dig deeper. + +```bash +osm2pgsql -d $PGOSM_CONN \ + --create --output=flex --style=./run.lua \ + /app/output/district-of-columbia-latest.osm.pbf +``` + +## Configure more things + +```bash +docker run --name pgosm -d --rm \ + -v ~/pgosm-data:/app/output \ + -v /etc/localtime:/etc/localtime:ro \ + -e POSTGRES_PASSWORD=$POSTGRES_PASSWORD \ + -p 5433:5432 -d rustprooflabs/pgosm-flex:0.7.1 \ + -c shared_buffers=1GB \ + -c work_mem=50MB \ + -c maintenance_work_mem=10GB \ + -c autovacuum_work_mem=2GB \ + -c checkpoint_timeout=300min \ + -c max_wal_senders=0 -c wal_level=minimal \ + -c max_wal_size=10GB \ + -c checkpoint_completion_target=0.9 \ + -c random_page_cost=1.0 \ + -c full_page_writes=off \ + -c fsync=off \ + -c log_statement=all \ + -c log_duration=on +``` + +