From 0337f95be3ba0fd38a5658cadc5f8366352b5183 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 7 Aug 2023 11:57:29 +0200 Subject: [PATCH 1/9] replication: remove code duplication --- scripts/osm2pgsql-replication | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/osm2pgsql-replication b/scripts/osm2pgsql-replication index de136b65f..e972a1909 100755 --- a/scripts/osm2pgsql-replication +++ b/scripts/osm2pgsql-replication @@ -127,7 +127,7 @@ def compute_database_date(conn, schema, prefix): LOG.debug("Found timestamp %s", date) try: - date = dt.datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=dt.timezone.utc) + date = dt.datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ') except ValueError: LOG.fatal("Cannot parse timestamp '%s'", date) return None From 6b489263004ea18b3b75ba9a579706f4847b4a56 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 9 Aug 2023 14:48:52 +0200 Subject: [PATCH 2/9] osm2pgsql-replication: add support for new propoerties table --- scripts/osm2pgsql-replication | 476 +++++++++++------- tests/bdd/command-line/replication.feature | 80 ++- .../command-line/replication_legacy.feature | 68 +++ tests/bdd/environment.py | 1 + tests/bdd/steps/steps_db.py | 19 +- tests/bdd/steps/steps_execute.py | 25 +- 6 files changed, 467 insertions(+), 202 deletions(-) create mode 100644 tests/bdd/command-line/replication_legacy.feature diff --git a/scripts/osm2pgsql-replication b/scripts/osm2pgsql-replication index e972a1909..86a6becab 100755 --- a/scripts/osm2pgsql-replication +++ b/scripts/osm2pgsql-replication @@ -62,110 +62,280 @@ def pretty_format_timedelta(seconds): output = [] # If weeks > 1 but hours == 0, we still want to show "0 hours" if weeks > 0: - output.append("{} week(s)".format(weeks)) + output.append("{:d} week(s)".format(weeks)) if days > 0 or weeks > 0: - output.append("{} day(s)".format(days)) + output.append("{:d} day(s)".format(days)) if hours > 0 or days > 0 or weeks > 0: - output.append("{} hour(s)".format(hours)) + output.append("{:d} hour(s)".format(hours)) if minutes > 0 or hours > 0 or days > 0 or weeks > 0: - output.append("{} minute(s)".format(minutes)) + output.append("{:d} minute(s)".format(minutes)) - output.append("{} second(s)".format(seconds)) + output.append("{:d} second(s)".format(seconds)) output = " ".join(output) return output -def connect(args): - """ Create a connection from the given command line arguments. - """ - # If dbname looks like a conninfo string use it as such - if args.database and any(part in args.database for part in ['=', '://']): - return psycopg.connect(args.database, fallback_application_name="osm2pgsql-replication") - return psycopg.connect(dbname=args.database, user=args.username, - host=args.host, port=args.port, fallback_application_name="osm2pgsql-replication") +def osm_date(date): + return date.strftime('%Y-%m-%dT%H:%M:%SZ') -def table_exists(conn, table_name, schema_name=None): - with conn.cursor() as cur: - if schema_name is not None: - cur.execute('SELECT * FROM pg_tables where tablename = %s and schemaname = %s ', (table_name, schema_name)) - else: - cur.execute('SELECT * FROM pg_tables where tablename = %s', (table_name, )) - return cur.rowcount > 0 +def from_osm_date(datestr): + return dt.datetime.strptime(datestr, '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=dt.timezone.utc) -def compute_database_date(conn, schema, prefix): - """ Determine the date of the database from the newest object in the - database. - """ - # First, find the way with the highest ID in the database - # Using nodes would be more reliable but those are not cached by osm2pgsql. - with conn.cursor() as cur: - table = sql.Identifier(schema, f'{prefix}_ways') - cur.execute(sql.SQL("SELECT max(id) FROM {}").format(table)) - osmid = cur.fetchone()[0] if cur.rowcount == 1 else None - - if osmid is None: - LOG.fatal("No data found in the database.") - return None - - LOG.debug("Using way id %d for timestamp lookup", osmid) - # Get the way from the API to find the timestamp when it was created. - url = 'https://www.openstreetmap.org/api/0.6/way/{}/1'.format(osmid) - headers = {"User-Agent" : "osm2pgsql-update", - "Accept" : "application/json"} - with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response: - data = json.loads(response.read().decode('utf-8')) - - if not data.get('elements') or not 'timestamp' in data['elements'][0]: - LOG.fatal("The way data downloaded from the API does not contain valid data.\n" - "URL used: %s", url) - return None - - date = data['elements'][0]['timestamp'] - LOG.debug("Found timestamp %s", date) +def start_point(param): + if param.isdigit(): + return int(param) - try: - date = dt.datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ') - except ValueError: - LOG.fatal("Cannot parse timestamp '%s'", date) - return None + if sys.version_info >= (3, 7): + try: + date = dt.datetime.fromisoformat(param) + if date.tzinfo is None: + date = date.replace(tzinfo=dt.timezone.utc) + return date + except ValueError: + pass - return date.replace(tzinfo=dt.timezone.utc) + return from_osm_date(param) -def setup_replication_state(conn, table, base_url, seq, date): - """ (Re)create the table for the replication state and fill it with - the given state. - """ - with conn.cursor() as cur: - cur.execute(sql.SQL('DROP TABLE IF EXISTS {}').format(table)) - cur.execute(sql.SQL("""CREATE TABLE {} - (url TEXT, - sequence INTEGER, - importdate TIMESTAMP WITH TIME ZONE) - """).format(table)) - cur.execute(sql.SQL('INSERT INTO {} VALUES(%s, %s, %s)').format(table), - (base_url, seq, date)) - conn.commit() - - -def update_replication_state(conn, table, seq, date): - """ Update sequence and date in the replication state table. - The table is assumed to exist. - """ - with conn.cursor() as cur: - if date is not None: - cur.execute(sql.SQL('UPDATE {} SET sequence=%s, importdate=%s').format(table), - (seq, date)) +class DBError(Exception): + + def __init__(self, errno, msg): + self.errno = errno + self.msg = msg + + +class DBConnection: + + def __init__(self, args): + self.schema = args.middle_schema + + # If dbname looks like a conninfo string use it as such + if args.database and any(part in args.database for part in ['=', '://']): + self.conn = psycopg.connect(args.database, + fallback_application_name="osm2pgsql-replication") + + self.conn = psycopg.connect(dbname=args.database, user=args.username, + host=args.host, port=args.port, + fallback_application_name="osm2pgsql-replication") + + self.name = self.conn.get_dsn_parameters()['dbname'] + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + if self.conn is not None: + self.conn.close() + + def table_exists(self, table_name): + with self.conn.cursor() as cur: + cur.execute('SELECT * FROM pg_tables where tablename = %s and schemaname = %s ', + (table_name, self.schema)) + return cur.rowcount > 0 + + def table_id(self, name): + return sql.Identifier(self.schema, name) + + +class Osm2pgsqlProperties: + PROP_TABLE_NAME = 'osm2pgsql_properties' + + def __init__(self, db): + self.db = db + self.is_updatable = self._get_prop('updatable') == 'true' + + def _get_prop(self, name): + with self.db.conn.cursor() as cur: + cur.execute(sql.SQL("SELECT value FROM {} WHERE property = %s") + .format(self.db.table_id(self.PROP_TABLE_NAME)), + (name, )) + return cur.fetchone()[0] if cur.rowcount == 1 else None + + def _set_prop(self, name, value): + with self.db.conn.cursor() as cur: + cur.execute(sql.SQL("""INSERT INTO {} (property, value) VALUES (%s, %s) + ON CONFLICT (property) + DO UPDATE SET value = EXCLUDED.value""") + .format(self.db.table_id(self.PROP_TABLE_NAME)), + (name, value)) + + def get_replication_base(self, server, start_at): + seq, date = None, None + if server is None: + server = self._get_prop('replication_base_url') + if server: + seq = self._get_prop('replication_sequence_number') + date = self._get_prop('replication_timestamp') + if date is not None: + date = from_osm_date(date) + else: + server = 'https://planet.openstreetmap.org/replication/minute' + + if isinstance(start_at, dt.datetime): + return server, None, start_at + + if seq is None or isinstance(start_at, int): + date = self._get_prop('current_timestamp') + if date is None: + LOG.Fatal("Cannot get timestamp from database. " + "Use --start-at to set an explicit date.") + return None, None, None + + date = from_osm_date(date) + if start_at is not None: + date -= dt.timedelta(minutes=start_at) + seq = None else: - cur.execute(sql.SQL('UPDATE {} SET sequence=%s').format(table), - (seq,)) + seq = int(seq) + + return server, seq, date + + def get_replication_state(self): + if not self.db.table_exists(self.PROP_TABLE_NAME): + raise DBError(1, "Cannot find replication status table. Run 'osm2pgsql-replication init' first.") + + base_url = self._get_prop('replication_base_url') + seq = self._get_prop('replication_sequence_number') + date = self._get_prop('replication_timestamp') + + if base_url is None or seq is None or date is None: + raise DBError(2, "Updates not set up correctly. Run 'osm2pgsql-updates init' first.") + + return base_url, int(seq), from_osm_date(date) + + def write_replication_state(self, base_url, seq, date): + self._set_prop('replication_base_url', base_url) + self._set_prop('replication_sequence_number', seq) + self._set_prop('replication_timestamp', osm_date(date)) + self.db.conn.commit() + + +class LegacyProperties: + + def __init__(self, db, prefix): + self.db = db + self.prop_table = f'{prefix}_replication_status' + self.way_table = f'{prefix}_ways' + self.is_updatable = db.table_exists(self.way_table) + + def get_replication_base(self, server, start_at): + """ Determine the date of the database from the newest object in the + database. + """ + if server is None: + server = 'https://planet.openstreetmap.org/replication/minute' - conn.commit() + if isinstance(start_at, dt.datetime): + return server, None, start_at -def status(conn, args): + # First, find the way with the highest ID in the database + # Using nodes would be more reliable but those are not cached by osm2pgsql. + with self.db.conn.cursor() as cur: + cur.execute(sql.SQL("SELECT max(id) FROM {}") + .format(self.db.table_id(self.way_table))) + osmid = cur.fetchone()[0] if cur.rowcount == 1 else None + + if osmid is None: + LOG.fatal("No data found in the database.") + return None, None, None + + LOG.debug("Using way id %d for timestamp lookup", osmid) + # Get the way from the API to find the timestamp when it was created. + url = 'https://www.openstreetmap.org/api/0.6/way/{}/1'.format(osmid) + headers = {"User-Agent" : "osm2pgsql-update", + "Accept" : "application/json"} + with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response: + data = json.loads(response.read().decode('utf-8')) + + if not data.get('elements') or not 'timestamp' in data['elements'][0]: + LOG.fatal("The way data downloaded from the API does not contain valid data.\n" + "URL used: %s", url) + return None, None, None + + date = data['elements'][0]['timestamp'] + LOG.debug("Found timestamp %s", date) + + try: + date = from_osm_date(date) + except ValueError: + LOG.fatal("Cannot parse timestamp '%s'", date) + return None, None, None + + if isinstance(start_at, int): + date -= dt.timedelta(minutes=start_at) + + return server, None, date + + def get_replication_state(self): + if not self.db.table_exists(self.prop_table): + raise DBError(1, "Cannot find replication status table. Run 'osm2pgsql-replication init' first.") + + with self.db.conn.cursor() as cur: + cur.execute(sql.SQL('SELECT * FROM {}').format(self.db.table_id(self.prop_table))) + if cur.rowcount != 1: + raise DBError(2, "Updates not set up correctly. Run 'osm2pgsql-updates init' first.") + + base_url, seq, date = cur.fetchone() + + if base_url is None or seq is None or date is None: + raise DBError(2, "Updates not set up correctly. Run 'osm2pgsql-updates init' first.") + + return base_url, seq, date + + def write_replication_state(self, base_url, seq, date): + table = self.db.table_id(self.prop_table) + with self.db.conn.cursor() as cur: + if not self.db.table_exists(self.prop_table): + cur.execute(sql.SQL("""CREATE TABLE {} + (url TEXT, + sequence INTEGER, + importdate TIMESTAMP WITH TIME ZONE) + """).format(table)) + cur.execute(sql.SQL('TRUNCATE {}').format(table)) + if date: + cur.execute(sql.SQL('INSERT INTO {} VALUES(%s, %s, %s)').format(table), + (base_url, seq, date)) + else: + cur.execute(sql.SQL('INSERT INTO {} VALUES(%s, %s)').format(table), + (base_url, seq)) + self.db.conn.commit() + + +def get_status_info(props, args): + results = {'status': 0} + + base_url, db_seq, db_ts = props.get_replication_state() + + db_ts = db_ts.astimezone(dt.timezone.utc) + results['server'] = {'base_url': base_url} + results['local'] = {'sequence': db_seq, 'timestamp': osm_date(db_ts)} + + repl = ReplicationServer(base_url) + state_info = repl.get_state_info() + if state_info is None: + # PyOsmium was unable to download the state information + results['status'] = 3 + results['error'] = "Unable to download the state information from {}".format(base_url) + else: + results['status'] = 0 + now = dt.datetime.now(dt.timezone.utc) + + server_seq, server_ts = state_info + server_ts = server_ts.astimezone(dt.timezone.utc) + + results['server']['sequence'] = server_seq + results['server']['timestamp'] = osm_date(server_ts) + results['server']['age_sec'] = int((now-server_ts).total_seconds()) + + results['local']['age_sec'] = int((now - db_ts).total_seconds()) + + return results + + +def status(props, args): """\ Print information about the current replication status, optionally as JSON. @@ -206,47 +376,10 @@ def status(conn, args): `local` is the status of your server. """ - - results = {} - - if not table_exists(conn, args.table_name, args.middle_schema): - results['status'] = 1 - results['error'] = "Cannot find replication status table. Run 'osm2pgsql-replication init' first." - else: - with conn.cursor() as cur: - cur.execute(sql.SQL('SELECT * FROM {}').format(args.table)) - if cur.rowcount != 1: - results['status'] = 2 - results['error'] = "Updates not set up correctly. Run 'osm2pgsql-updates init' first." - else: - - base_url, db_seq, db_ts = cur.fetchone() - db_ts = db_ts.astimezone(dt.timezone.utc) - results['server'] = {} - results['local'] = {} - results['server']['base_url'] = base_url - results['local']['sequence'] = db_seq - results['local']['timestamp'] = db_ts.strftime("%Y-%m-%dT%H:%M:%SZ") - - - repl = ReplicationServer(base_url) - state_info = repl.get_state_info() - if state_info is None: - # PyOsmium was unable to download the state information - results['status'] = 3 - results['error'] = "Unable to download the state information from {}".format(base_url) - else: - results['status'] = 0 - now = dt.datetime.now(dt.timezone.utc) - - server_seq, server_ts = state_info - server_ts = server_ts.astimezone(dt.timezone.utc) - - results['server']['sequence'] = server_seq - results['server']['timestamp'] = server_ts.strftime("%Y-%m-%dT%H:%M:%SZ") - results['server']['age_sec'] = int((now-server_ts).total_seconds()) - - results['local']['age_sec'] = int((now - db_ts).total_seconds()) + try: + results = get_status_info(props, args) + except DBError as err: + results = {'status': err.errno, 'error': err.msg} if args.json: print(json.dumps(results)) @@ -268,11 +401,10 @@ def status(conn, args): print("Local database's most recent data is {} old".format(pretty_format_timedelta(results['local']['age_sec']))) - return results['status'] -def init(conn, args): +def init(props, args): """\ Initialise the replication process. @@ -290,17 +422,13 @@ def init(conn, args): this with the `--server` parameter. """ if args.osm_file is None: - date = compute_database_date(conn, args.middle_schema, args.prefix) - if date is None: + base_url, seq, date = props.get_replication_base(args.server, args.start_at) + if base_url is None: return 1 - - date = date - dt.timedelta(hours=3) - base_url = args.server - seq = None else: base_url, seq, date = get_replication_header(args.osm_file) if base_url is None or (seq is None and date is None): - LOG.fatal("File '%s' has no usable replication headers. Use '--server' instead.", args.osm_file ) + LOG.fatal("File '%s' has no usable replication headers. Use '--server' instead.", args.osm_file) return 1 repl = ReplicationServer(base_url) @@ -319,16 +447,17 @@ def init(conn, args): LOG.fatal("Cannot reach the configured replication service '%s'.\n" "Does the URL point to a directory containing OSM update data?", base_url) + date = from_osm_date(state.timestamp) - setup_replication_state(conn, args.table, base_url, seq, date) + props.write_replication_state(base_url, seq, date) LOG.info("Initialised updates for service '%s'.", base_url) - LOG.info("Starting at sequence %d (%s).", seq, - date.astimezone(dt.timezone.utc).strftime('%Y-%m-%dT%H:%MZ')) + LOG.info("Starting at sequence %d (%s).", seq, osm_date(date)) return 0 -def update(conn, args): + +def update(props, args): """\ Download newly available data and apply it to the database. @@ -354,21 +483,15 @@ def update(conn, args): may be missing in the rare case that the replication service stops responding after the updates have been downloaded. """ - if not table_exists(conn, args.table_name, args.middle_schema): - LOG.fatal("Cannot find replication status table. " - "Run 'osm2pgsql-replication init' first.") + try: + base_url, seq, ts = props.get_replication_state() + except DBError as err: + LOG.fatal(err.msg) return 1 - with conn.cursor() as cur: - cur.execute(sql.SQL('SELECT * FROM {}').format(args.table)) - if cur.rowcount != 1: - LOG.fatal("Updates not set up correctly. Run 'osm2pgsql-updates init' first.") - return 1 - - base_url, seq, ts = cur.fetchone() - initial_local_timestamp = ts - LOG.info("Using replication service '%s'.", base_url) - local_db_age_sec = int((dt.datetime.now(dt.timezone.utc) - ts).total_seconds()) + initial_local_timestamp = ts + LOG.info("Using replication service '%s'.", base_url) + local_db_age_sec = int((dt.datetime.now(dt.timezone.utc) - ts).total_seconds()) repl = ReplicationServer(base_url) current = repl.get_state_info() @@ -387,7 +510,8 @@ def update(conn, args): current.sequence - seq, current.sequence, seq, pretty_format_timedelta(int((current.timestamp - ts).total_seconds())), int((current.timestamp - ts).total_seconds()), - ts.astimezone(dt.timezone.utc).strftime('%Y-%m-%dT%H:%MZ'), current.timestamp.astimezone(dt.timezone.utc).strftime('%Y-%m-%dT%H:%MZ') + osm_date(ts.astimezone(dt.timezone.utc)), + osm_date(current.timestamp.astimezone(dt.timezone.utc)) ) update_started = dt.datetime.now(dt.timezone.utc) @@ -437,28 +561,23 @@ def update(conn, args): LOG.debug('Calling post-processing script: %s', ' '.join(cmd)) subprocess.run(cmd, check=True) - update_replication_state(conn, args.table, seq, - nextstate.timestamp if nextstate else None) + props.write_replication_state(base_url, seq, nextstate.timestamp if nextstate else None) if nextstate is not None: LOG.info("Data imported until %s. Backlog remaining: %s", - nextstate.timestamp.astimezone(dt.timezone.utc).strftime('%Y-%m-%dT%H:%MZ'), + osm_date(nextstate.timestamp.astimezone(dt.timezone.utc)), pretty_format_timedelta((dt.datetime.now(dt.timezone.utc) - nextstate.timestamp).total_seconds()), ) - if args.once: break - update_duration_sec = (dt.datetime.now(dt.timezone.utc) - update_started).total_seconds() - with conn.cursor() as cur: - cur.execute(sql.SQL('SELECT * FROM {}').format(args.table)) - if cur.rowcount != 1: - LOG.fatal("Updates not set up correctly. Run 'osm2pgsql-updates init' first.") - return 1 - - _base_url, _seq, current_local_timestamp = cur.fetchone() + try: + _base_url, _seq, current_local_timestamp = props.get_replication_state() + except DBError as err: + LOG.fatal(err.msg) + return 1 total_applied_changes_duration_sec = (current_local_timestamp - initial_local_timestamp).total_seconds() LOG.debug("It took %s (%d sec) to apply %s (%d sec) of changes. This is a speed of ×%.1f.", @@ -469,6 +588,7 @@ def update(conn, args): return 0 + def get_parser(): parser = ArgumentParser(description=__doc__, prog='osm2pgsql-replication', @@ -514,8 +634,13 @@ def get_parser(): srcgrp.add_argument('--osm-file', metavar='FILE', help='Get replication information from the given file.') srcgrp.add_argument('--server', metavar='URL', - default='https://planet.openstreetmap.org/replication/minute', help='Use replication server at the given URL (default: %(default)s)') + grp.add_argument('--start-at', metavar='TIME', type=start_point, + help='Time when to start replication. When an absolute timestamp ' + '(in ISO format) is given, it will be used. If a number ' + 'is given, then replication starts the number of minutes ' + 'before the known date of the database.') + cmd.set_defaults(handler=init) # Arguments for update @@ -549,9 +674,9 @@ def get_parser(): cmd.add_argument('--json', action="store_true", default=False, help="Output status as json.") cmd.set_defaults(handler=status) - return parser + def main(): parser = get_parser() args = parser.parse_args() @@ -574,21 +699,18 @@ def main(): datefmt='%Y-%m-%d %H:%M:%S', level=max(4 - args.verbose, 1) * 10) - args.table_name = f'{args.prefix}_replication_status' - args.table = sql.Identifier(args.middle_schema, args.table_name) - - conn = connect(args) + with DBConnection(args) as db: + if db.table_exists(Osm2pgsqlProperties.PROP_TABLE_NAME): + props = Osm2pgsqlProperties(db) + else: + props = LegacyProperties(db, args.prefix) - try: - if not table_exists(conn, f'{args.prefix}_ways'): - dbname = conn.get_dsn_parameters()['dbname'] # args.database is None when not specified - LOG.fatal(f'osm2pgsql middle table "{args.prefix}_ways" not found in database "{dbname}". ' + if not props.is_updatable: + LOG.fatal(f'osm2pgsql middle table "{args.prefix}_ways" not found in database "{db.name}". ' 'Database needs to be imported in --slim mode.') return 1 - return args.handler(conn, args) - finally: - conn.close() + return args.handler(props, args) if __name__ == '__main__': diff --git a/tests/bdd/command-line/replication.feature b/tests/bdd/command-line/replication.feature index cb61a1f28..71592d0fd 100644 --- a/tests/bdd/command-line/replication.feature +++ b/tests/bdd/command-line/replication.feature @@ -1,7 +1,6 @@ -Feature: Tests for the osm2pgsql-replication script - - Scenario: Replication can be initialised +Feature: Tests for the osm2pgsql-replication script with property table + Scenario: Replication can be initialised with a osm file after import Given the OSM data """ n34 Tamenity=restaurant x77 y45.3 @@ -12,21 +11,80 @@ Feature: Tests for the osm2pgsql-replication script And running osm2pgsql-replication | init | --osm-file={TEST_DATA_DIR}/liechtenstein-2013-08-03.osm.pbf | - Then table planet_osm_replication_status has 1 row + Then table osm2pgsql_properties contains + | property | value | + | replication_base_url | http://example.com/europe/liechtenstein-updates | + | replication_sequence_number | 9999999 | + | replication_timestamp | 2013-08-03T19:00:02Z | - Scenario: Replication can be initialised in different schema - Given the database schema foobar - And the OSM data + Scenario: Replication will be initialised from the information of the import file + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + When running osm2pgsql pgsql with parameters + | --slim | + + And running osm2pgsql-replication + | init | + + Then table osm2pgsql_properties contains + | property | value | + | replication_base_url | http://example.com/europe/liechtenstein-updates | + | replication_sequence_number | 9999999 | + | replication_timestamp | 2013-08-03T19:00:02Z | + + + Scenario: Replication cannot be initialsed when date information is missing + Given the OSM data """ n34 Tamenity=restaurant x77 y45.3 """ When running osm2pgsql pgsql with parameters | --slim | - And running osm2pgsql-replication + Then running osm2pgsql-replication fails with returncode 1 + | init | + And the error output contains + """ + Cannot get timestamp from database. + """ + + Scenario: Replication cannot initialised on non-updatable database + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + When running osm2pgsql pgsql + + Then running osm2pgsql-replication fails with returncode 1 | init | - | --osm-file={TEST_DATA_DIR}/liechtenstein-2013-08-03.osm.pbf | - | --middle-schema=foobar | + And the error output contains + """ + Database needs to be imported in --slim mode. + """ + + Scenario: Replication can be initialised for a database in a different schema + Given the database schema foobar + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + When running osm2pgsql pgsql with parameters + | --slim | --middle-schema=foobar | + + And running osm2pgsql-replication + | init | --middle-schema=foobar | + + Then table foobar.osm2pgsql_properties contains + | property | value | + | replication_base_url | http://example.com/europe/liechtenstein-updates | + | replication_sequence_number | 9999999 | + | replication_timestamp | 2013-08-03T19:00:02Z | + + + Scenario: Replication initialiasion will fail for a database in a different schema + Given the database schema foobar + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + When running osm2pgsql pgsql with parameters + | --slim | + + Then running osm2pgsql-replication fails with returncode 1 + | init | --middle-schema=foobar | + And the error output contains + """ + Database needs to be imported in --slim mode. + """ - Then table foobar.planet_osm_replication_status has 1 row diff --git a/tests/bdd/command-line/replication_legacy.feature b/tests/bdd/command-line/replication_legacy.feature new file mode 100644 index 000000000..93126275d --- /dev/null +++ b/tests/bdd/command-line/replication_legacy.feature @@ -0,0 +1,68 @@ +Feature: Tests for the osm2pgsql-replication script without property table + + Background: + Given the OSM data + """ + n34 Tamenity=restaurant x77 y45.3 + n35 x77 y45.31 + w4 Thighway=residential Nn34,n35 + """ + + Scenario: Replication can be initialised with a osm file + When running osm2pgsql pgsql with parameters + | --slim | + And deleting table osm2pgsql_properties + + And running osm2pgsql-replication + | init | --osm-file={TEST_DATA_DIR}/liechtenstein-2013-08-03.osm.pbf | + + Then table planet_osm_replication_status contains exactly + | url | sequence | importdate at time zone 'UTC' | + | http://example.com/europe/liechtenstein-updates | 9999999 | 2013-08-03 19:00:02 | + + + Scenario: Replication cannot be initialised from a osm file without replication info + When running osm2pgsql pgsql with parameters + | --slim | + And deleting table osm2pgsql_properties + + Then running osm2pgsql-replication fails with returncode 1 + | init | --osm-file={TEST_DATA_DIR}/008-ch.osc.gz | + And the error output contains + """ + has no usable replication headers + """ + + + Scenario: Replication can be initialised in different schema + Given the database schema foobar + When running osm2pgsql pgsql with parameters + | --slim | --middle-schema=foobar | + + And deleting table foobar.osm2pgsql_properties + + And running osm2pgsql-replication + | init | + | --osm-file={TEST_DATA_DIR}/liechtenstein-2013-08-03.osm.pbf | + | --middle-schema=foobar | + + Then table foobar.planet_osm_replication_status contains exactly + | url | sequence | importdate at time zone 'UTC' | + | http://example.com/europe/liechtenstein-updates | 9999999 | 2013-08-03 19:00:02 | + + + Scenario: Replication must be initialised in the same schema as rest of middle + Given the database schema foobar + When running osm2pgsql pgsql with parameters + | --slim | --middle-schema=foobar | + + And deleting table foobar.osm2pgsql_properties + + Then running osm2pgsql-replication fails with returncode 1 + | init | + | --osm-file={TEST_DATA_DIR}/liechtenstein-2013-08-03.osm.pbf | + And the error output contains + """ + Database needs to be imported in --slim mode. + """ + diff --git a/tests/bdd/environment.py b/tests/bdd/environment.py index f37b77d28..7bbf501e5 100644 --- a/tests/bdd/environment.py +++ b/tests/bdd/environment.py @@ -130,6 +130,7 @@ def working_directory(context, **kwargs): with tempfile.TemporaryDirectory() as tmpdir: yield Path(tmpdir) + def before_tag(context, tag): if tag == 'needs-pg-index-includes': if context.config.userdata['PG_VERSION'] < 110000: diff --git a/tests/bdd/steps/steps_db.py b/tests/bdd/steps/steps_db.py index 9b46a13a2..abe75c52c 100644 --- a/tests/bdd/steps/steps_db.py +++ b/tests/bdd/steps/steps_db.py @@ -18,16 +18,18 @@ def create_db_schema(context, schema): with context.db.cursor() as cur: cur.execute("CREATE SCHEMA " + schema) + +@when("deleting table (?P.+)") +def delete_table(context, table): + with context.db.cursor() as cur: + cur.execute("DROP TABLE " + table) + + @then("table (?P
.+) has (?P\d+) rows?(?P with condition)?") def db_table_row_count(context, table, row_num, has_where): assert table_exists(context.db, table) - if '.' in table: - schema, tablename = table.split('.', 2) - query = sql.SQL("SELECT count(*) FROM {}.{}")\ - .format(sql.Identifier(schema), sql.Identifier(tablename)) - else: - query = sql.SQL("SELECT count(*) FROM {}").format(sql.Identifier(table)) + query = sql.SQL("SELECT count(*) FROM {}").format(sql.Identifier(*table.split('.', 2))) if has_where: query = sql.SQL("{} WHERE {}").format(query, sql.SQL(context.text)) @@ -43,7 +45,7 @@ def db_table_sum_up(context, table, formula, result, has_where): assert table_exists(context.db, table) query = sql.SQL("SELECT round(sum({})) FROM {}")\ - .format(sql.SQL(formula), sql.Identifier(table)) + .format(sql.SQL(formula), sql.Identifier(*table.split('.', 2))) if has_where: query = sql.SQL("{} WHERE {}").format(query, sql.SQL(context.text)) @@ -72,7 +74,8 @@ def db_check_table_content(context, table, exact): rows = sql.SQL(', '.join(h.rsplit('@')[0] for h in context.table.headings)) with context.db.cursor() as cur: - cur.execute(sql.SQL("SELECT {} FROM {}").format(rows, sql.Identifier(table))) + cur.execute(sql.SQL("SELECT {} FROM {}") + .format(rows, sql.Identifier(*table.split('.', 2)))) actuals = list(DBRow(r, context.table.headings, context.geometry_factory) for r in cur) diff --git a/tests/bdd/steps/steps_execute.py b/tests/bdd/steps/steps_execute.py index 432a447d1..d74bd3205 100644 --- a/tests/bdd/steps/steps_execute.py +++ b/tests/bdd/steps/steps_execute.py @@ -85,18 +85,20 @@ def run_osm2pgsql_replication(context): if '-d' not in cmdline and '--database' not in cmdline: cmdline.extend(('-d', context.config.userdata['TEST_DB'])) - + # on Windows execute script directly with python, because shebang is not recognised if os.name == 'nt': - cmdline.insert(0, "python") + cmdline.insert(0, "python") proc = subprocess.Popen(cmdline, cwd=str(context.workdir), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - _, errs = proc.communicate() + outdata = proc.communicate() + + context.osm2pgsql_outdata = [d.decode('utf-8').replace('\\n', '\n') for d in outdata] - return proc.returncode, errs.decode('utf-8') + return proc.returncode @given("no lua tagtransform") @@ -156,11 +158,22 @@ def execute_osm2pgsql_with_failure(context, output): @when("running osm2pgsql-replication") def execute_osm2pgsql_replication_successfully(context): - returncode, errs = run_osm2pgsql_replication(context) + returncode = run_osm2pgsql_replication(context) assert returncode == 0,\ f"osm2pgsql-replication failed with error code {returncode}.\n"\ - f"Errors:\n{errs}" + f"Output:\n{context.osm2pgsql_outdata[0]}\n{context.osm2pgsql_outdata[1]}\n" + + +@then("running osm2pgsql-replication fails(?: with returncode (?P\d+))?") +def execute_osm2pgsql_replication_successfully(context, expected): + returncode = run_osm2pgsql_replication(context) + + assert returncode != 0, "osm2pgsql-replication unexpectedly succeeded" + if expected: + assert returncode == int(expected), \ + f"osm2pgsql-replication failed with returncode {returncode} instead of {expected}."\ + f"Output:\n{context.osm2pgsql_outdata[0]}\n{context.osm2pgsql_outdata[1]}\n" @then("the (?P\w+) output contains") From deae7e003e194687d5ccce92cd7530606e9f2709 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 9 Aug 2023 15:05:48 +0200 Subject: [PATCH 3/9] osm2pgsql-replication: switch to proper use of exceptions --- scripts/osm2pgsql-replication | 58 +++++++++++++---------------------- 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/scripts/osm2pgsql-replication b/scripts/osm2pgsql-replication index 86a6becab..f60eca7eb 100755 --- a/scripts/osm2pgsql-replication +++ b/scripts/osm2pgsql-replication @@ -180,9 +180,8 @@ class Osm2pgsqlProperties: if seq is None or isinstance(start_at, int): date = self._get_prop('current_timestamp') if date is None: - LOG.Fatal("Cannot get timestamp from database. " - "Use --start-at to set an explicit date.") - return None, None, None + raise DBError(1, "Cannot get timestamp from database. " + "Use --start-at to set an explicit date.") date = from_osm_date(date) if start_at is not None: @@ -239,8 +238,7 @@ class LegacyProperties: osmid = cur.fetchone()[0] if cur.rowcount == 1 else None if osmid is None: - LOG.fatal("No data found in the database.") - return None, None, None + raise DBError(1, "No data found in the database.") LOG.debug("Using way id %d for timestamp lookup", osmid) # Get the way from the API to find the timestamp when it was created. @@ -251,9 +249,8 @@ class LegacyProperties: data = json.loads(response.read().decode('utf-8')) if not data.get('elements') or not 'timestamp' in data['elements'][0]: - LOG.fatal("The way data downloaded from the API does not contain valid data.\n" - "URL used: %s", url) - return None, None, None + raise DBError(1, "The way data downloaded from the API does not contain valid data.\n" + f"URL used: {url}") date = data['elements'][0]['timestamp'] LOG.debug("Found timestamp %s", date) @@ -261,8 +258,7 @@ class LegacyProperties: try: date = from_osm_date(date) except ValueError: - LOG.fatal("Cannot parse timestamp '%s'", date) - return None, None, None + raise DBError(1, f"Cannot parse timestamp '{date}'") if isinstance(start_at, int): date -= dt.timedelta(minutes=start_at) @@ -423,30 +419,23 @@ def init(props, args): """ if args.osm_file is None: base_url, seq, date = props.get_replication_base(args.server, args.start_at) - if base_url is None: - return 1 else: base_url, seq, date = get_replication_header(args.osm_file) if base_url is None or (seq is None and date is None): - LOG.fatal("File '%s' has no usable replication headers. Use '--server' instead.", args.osm_file) - return 1 + raise DBError(1, f"File '{args.osm_file}' has no usable replication headers. Use '--server' instead.") repl = ReplicationServer(base_url) if seq is None: seq = repl.timestamp_to_sequence(date) - if seq is None: - LOG.fatal("Cannot reach the configured replication service '%s'.\n" - "Does the URL point to a directory containing OSM update data?", - base_url) - return 1 + raise DBError(1, f"Cannot reach the configured replication service '{base_url}'.\n" + "Does the URL point to a directory containing OSM update data?") if date is None: state = repl.get_state_info(seq) if state is None: - LOG.fatal("Cannot reach the configured replication service '%s'.\n" - "Does the URL point to a directory containing OSM update data?", - base_url) + raise DBError(1, f"Cannot reach the configured replication service '{base_url}'.\n" + "Does the URL point to a directory containing OSM update data?") date = from_osm_date(state.timestamp) props.write_replication_state(base_url, seq, date) @@ -483,11 +472,7 @@ def update(props, args): may be missing in the rare case that the replication service stops responding after the updates have been downloaded. """ - try: - base_url, seq, ts = props.get_replication_state() - except DBError as err: - LOG.fatal(err.msg) - return 1 + base_url, seq, ts = props.get_replication_state() initial_local_timestamp = ts LOG.info("Using replication service '%s'.", base_url) @@ -496,10 +481,8 @@ def update(props, args): repl = ReplicationServer(base_url) current = repl.get_state_info() if current is None: - LOG.fatal("Cannot reach the configured replication service '%s'.\n" - "Does the URL point to a directory containing OSM update data?", - base_url) - return 1 + raise DBError(1, f"Cannot reach the configured replication service '{base_url}'.\n" + "Does the URL point to a directory containing OSM update data?") if seq >= current.sequence: LOG.info("Database already up-to-date.") @@ -573,11 +556,7 @@ def update(props, args): break update_duration_sec = (dt.datetime.now(dt.timezone.utc) - update_started).total_seconds() - try: - _base_url, _seq, current_local_timestamp = props.get_replication_state() - except DBError as err: - LOG.fatal(err.msg) - return 1 + _base_url, _seq, current_local_timestamp = props.get_replication_state() total_applied_changes_duration_sec = (current_local_timestamp - initial_local_timestamp).total_seconds() LOG.debug("It took %s (%d sec) to apply %s (%d sec) of changes. This is a speed of ×%.1f.", @@ -710,7 +689,12 @@ def main(): 'Database needs to be imported in --slim mode.') return 1 - return args.handler(props, args) + try: + return args.handler(props, args) + except DBError as err: + LOG.fatal(err.msg) + + return 1 if __name__ == '__main__': From 9985d70825f2b0d7f97581769923a4a693577377 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 9 Aug 2023 17:27:45 +0200 Subject: [PATCH 4/9] run osm2pgsql-replication in BDD tests as a module This gives us the possibility to monkeypatch parts of the code. --- scripts/osm2pgsql-replication | 7 +++++-- tests/bdd/environment.py | 10 ++++++++++ tests/bdd/steps/steps_execute.py | 25 +++++++++++++------------ 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/scripts/osm2pgsql-replication b/scripts/osm2pgsql-replication index f60eca7eb..9d1c94484 100755 --- a/scripts/osm2pgsql-replication +++ b/scripts/osm2pgsql-replication @@ -656,9 +656,12 @@ def get_parser(): return parser -def main(): +def main(prog_args=None): parser = get_parser() - args = parser.parse_args() + try: + args = parser.parse_args(args=prog_args) + except SystemExit: + return 1 if missing_modules: LOG.fatal("Missing required Python libraries %(mods)s.\n\n" diff --git a/tests/bdd/environment.py b/tests/bdd/environment.py index 7bbf501e5..50e597992 100644 --- a/tests/bdd/environment.py +++ b/tests/bdd/environment.py @@ -8,6 +8,8 @@ from pathlib import Path import subprocess import tempfile +import importlib.util +from importlib.machinery import SourceFileLoader from behave import * import psycopg2 @@ -89,6 +91,14 @@ def before_all(context): context.test_data_dir = Path(context.config.userdata['TEST_DATA_DIR']).resolve() context.default_data_dir = Path(context.config.userdata['SRC_DIR']).resolve() + # Set up replication script. + replicationfile = str(Path(context.config.userdata['REPLICATION_SCRIPT']).resolve()) + spec = importlib.util.spec_from_loader('osm2pgsql_replication', + SourceFileLoader( 'osm2pgsql_replication',replicationfile)) + assert spec, f"File not found: {replicationfile}" + context.osm2pgsql_replication = importlib.util.module_from_spec(spec) + spec.loader.exec_module(context.osm2pgsql_replication) + def before_scenario(context, scenario): """ Set up a fresh, empty test database. diff --git a/tests/bdd/steps/steps_execute.py b/tests/bdd/steps/steps_execute.py index d74bd3205..593763287 100644 --- a/tests/bdd/steps/steps_execute.py +++ b/tests/bdd/steps/steps_execute.py @@ -9,8 +9,10 @@ """ from io import StringIO from pathlib import Path -import os +import sys import subprocess +import contextlib +import logging def get_import_file(context): if context.import_file is not None: @@ -76,7 +78,7 @@ def run_osm2pgsql(context, output): def run_osm2pgsql_replication(context): - cmdline = [str(Path(context.config.userdata['REPLICATION_SCRIPT']).resolve())] + cmdline = [] # convert table items to CLI arguments and inject constants to placeholders if context.table: cmdline.extend(f.format(**context.config.userdata) for f in context.table.headings if f) @@ -86,19 +88,18 @@ def run_osm2pgsql_replication(context): if '-d' not in cmdline and '--database' not in cmdline: cmdline.extend(('-d', context.config.userdata['TEST_DB'])) - # on Windows execute script directly with python, because shebang is not recognised - if os.name == 'nt': - cmdline.insert(0, "python") - proc = subprocess.Popen(cmdline, cwd=str(context.workdir), - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + serr = StringIO() + log_handler = logging.StreamHandler(serr) + context.osm2pgsql_replication.LOG.addHandler(log_handler) + with contextlib.redirect_stdout(StringIO()) as sout: + retval = context.osm2pgsql_replication.main(cmdline) + context.osm2pgsql_replication.LOG.removeHandler(log_handler) - outdata = proc.communicate() + context.osm2pgsql_outdata = [sout.getvalue(), serr.getvalue()] + print(context.osm2pgsql_outdata) - context.osm2pgsql_outdata = [d.decode('utf-8').replace('\\n', '\n') for d in outdata] - - return proc.returncode + return retval @given("no lua tagtransform") From dbac88f43e9efac352206e8f41666dddd35bd86b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 10 Aug 2023 09:23:14 +0200 Subject: [PATCH 5/9] add tests for osm2pgsql-replication Needs mocking of replication functions and url accesses. --- scripts/osm2pgsql-replication | 21 +- tests/bdd/command-line/replication.feature | 226 +++++++++++++++++- .../command-line/replication_legacy.feature | 174 ++++++++++++++ tests/bdd/environment.py | 12 + tests/bdd/steps/replication_server_mock.py | 53 ++++ tests/bdd/steps/steps_execute.py | 25 ++ 6 files changed, 500 insertions(+), 11 deletions(-) create mode 100644 tests/bdd/steps/replication_server_mock.py diff --git a/scripts/osm2pgsql-replication b/scripts/osm2pgsql-replication index 9d1c94484..531db2f7e 100755 --- a/scripts/osm2pgsql-replication +++ b/scripts/osm2pgsql-replication @@ -54,6 +54,7 @@ LOG = logging.getLogger() OSM2PGSQL_PATH = Path(__file__).parent.resolve() / 'osm2pgsql' def pretty_format_timedelta(seconds): + seconds = int(seconds) (minutes, seconds) = divmod(seconds, 60) (hours, minutes) = divmod(minutes, 60) (days, hours) = divmod(hours, 24) @@ -62,15 +63,15 @@ def pretty_format_timedelta(seconds): output = [] # If weeks > 1 but hours == 0, we still want to show "0 hours" if weeks > 0: - output.append("{:d} week(s)".format(weeks)) + output.append("{} week(s)".format(weeks)) if days > 0 or weeks > 0: - output.append("{:d} day(s)".format(days)) + output.append("{} day(s)".format(days)) if hours > 0 or days > 0 or weeks > 0: - output.append("{:d} hour(s)".format(hours)) + output.append("{} hour(s)".format(hours)) if minutes > 0 or hours > 0 or days > 0 or weeks > 0: - output.append("{:d} minute(s)".format(minutes)) + output.append("{} minute(s)".format(minutes)) - output.append("{:d} second(s)".format(seconds)) + output.append("{} second(s)".format(seconds)) output = " ".join(output) return output @@ -201,7 +202,7 @@ class Osm2pgsqlProperties: date = self._get_prop('replication_timestamp') if base_url is None or seq is None or date is None: - raise DBError(2, "Updates not set up correctly. Run 'osm2pgsql-updates init' first.") + raise DBError(2, "Updates not set up correctly. Run 'osm2pgsql-replication init' first.") return base_url, int(seq), from_osm_date(date) @@ -243,7 +244,7 @@ class LegacyProperties: LOG.debug("Using way id %d for timestamp lookup", osmid) # Get the way from the API to find the timestamp when it was created. url = 'https://www.openstreetmap.org/api/0.6/way/{}/1'.format(osmid) - headers = {"User-Agent" : "osm2pgsql-update", + headers = {"User-Agent" : "osm2pgsql-replication", "Accept" : "application/json"} with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response: data = json.loads(response.read().decode('utf-8')) @@ -272,12 +273,12 @@ class LegacyProperties: with self.db.conn.cursor() as cur: cur.execute(sql.SQL('SELECT * FROM {}').format(self.db.table_id(self.prop_table))) if cur.rowcount != 1: - raise DBError(2, "Updates not set up correctly. Run 'osm2pgsql-updates init' first.") + raise DBError(2, "Updates not set up correctly. Run 'osm2pgsql-replication init' first.") base_url, seq, date = cur.fetchone() if base_url is None or seq is None or date is None: - raise DBError(2, "Updates not set up correctly. Run 'osm2pgsql-updates init' first.") + raise DBError(2, "Updates not set up correctly. Run 'osm2pgsql-replication init' first.") return base_url, seq, date @@ -491,7 +492,7 @@ def update(props, args): remote_server_age_sec = int((dt.datetime.now(dt.timezone.utc) - current.timestamp).total_seconds()) LOG.debug("Applying %d sequence(s) (%d → %d), covering %s (%s sec) of changes (%s → %s)", current.sequence - seq, current.sequence, seq, - pretty_format_timedelta(int((current.timestamp - ts).total_seconds())), + pretty_format_timedelta((current.timestamp - ts).total_seconds()), int((current.timestamp - ts).total_seconds()), osm_date(ts.astimezone(dt.timezone.utc)), osm_date(current.timestamp.astimezone(dt.timezone.utc)) diff --git a/tests/bdd/command-line/replication.feature b/tests/bdd/command-line/replication.feature index 71592d0fd..e2d36de5e 100644 --- a/tests/bdd/command-line/replication.feature +++ b/tests/bdd/command-line/replication.feature @@ -1,10 +1,11 @@ Feature: Tests for the osm2pgsql-replication script with property table - Scenario: Replication can be initialised with a osm file after import + Scenario: Replication can be initialised with an osm file after import Given the OSM data """ n34 Tamenity=restaurant x77 y45.3 """ + And the replication service at http://example.com/europe/liechtenstein-updates When running osm2pgsql pgsql with parameters | --slim | @@ -20,6 +21,7 @@ Feature: Tests for the osm2pgsql-replication script with property table Scenario: Replication will be initialised from the information of the import file Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the replication service at http://example.com/europe/liechtenstein-updates When running osm2pgsql pgsql with parameters | --slim | @@ -62,6 +64,7 @@ Feature: Tests for the osm2pgsql-replication script with property table Scenario: Replication can be initialised for a database in a different schema Given the database schema foobar Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the replication service at http://example.com/europe/liechtenstein-updates When running osm2pgsql pgsql with parameters | --slim | --middle-schema=foobar | @@ -78,6 +81,7 @@ Feature: Tests for the osm2pgsql-replication script with property table Scenario: Replication initialiasion will fail for a database in a different schema Given the database schema foobar Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the replication service at http://example.com/europe/liechtenstein-updates When running osm2pgsql pgsql with parameters | --slim | @@ -88,3 +92,223 @@ Feature: Tests for the osm2pgsql-replication script with property table Database needs to be imported in --slim mode. """ + Scenario: Replication can be initialised with a fixed date (no previous replication info) + Given the OSM data + """ + n34 Tamenity=restaurant x77 y45.3 + """ + And the replication service at https://planet.openstreetmap.org/replication/minute + | sequence | timestamp | + | 345 | 2020-10-04T01:00:00Z | + | 346 | 2020-10-04T02:00:00Z | + | 347 | 2020-10-04T03:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + + And running osm2pgsql-replication + | init | --start-at | 2020-10-04T01:30:00Z | + + Then table osm2pgsql_properties contains + | property | value | + | replication_base_url | https://planet.openstreetmap.org/replication/minute | + | replication_sequence_number | 345 | + | replication_timestamp | 2020-10-04T01:30:00Z | + + + Scenario: Replication can be initialised with a fixed date (with previous replication info) + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the replication service at http://example.com/europe/liechtenstein-updates + | sequence | timestamp | + | 345 | 2020-10-04T01:00:00Z | + | 346 | 2020-10-04T02:00:00Z | + | 347 | 2020-10-04T03:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + + And running osm2pgsql-replication + | init | --start-at | 2020-10-04T03:30:00Z | + + Then table osm2pgsql_properties contains + | property | value | + | replication_base_url | http://example.com/europe/liechtenstein-updates | + | replication_sequence_number | 347 | + | replication_timestamp | 2020-10-04T03:30:00Z | + + + Scenario: Replication can be initialised with a rollback (no previous replication info) + Given the OSM data + """ + n34 Tamenity=restaurant x77 y45.3 t2020-10-04T02:00:01Z + """ + And the replication service at https://planet.openstreetmap.org/replication/minute + | sequence | timestamp | + | 345 | 2020-10-04T01:00:00Z | + | 346 | 2020-10-04T02:00:00Z | + | 347 | 2020-10-04T03:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + + And running osm2pgsql-replication + | init | --start-at | 60 | + + Then table osm2pgsql_properties contains + | property | value | + | replication_base_url | https://planet.openstreetmap.org/replication/minute | + | replication_sequence_number | 345 | + | replication_timestamp | 2020-10-04T01:00:01Z | + + + Scenario: Replication can be initialised with a rollback (with previous replication info) + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the replication service at http://example.com/europe/liechtenstein-updates + | sequence | timestamp | + | 345 | 2020-10-04T01:00:00Z | + | 346 | 2020-10-04T02:00:00Z | + | 347 | 2020-10-04T03:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + + And running osm2pgsql-replication + | init | --start-at | 60 | + + Then table osm2pgsql_properties contains + | property | value | + | replication_base_url | http://example.com/europe/liechtenstein-updates | + | replication_sequence_number | 345 | + | replication_timestamp | 2013-08-03T14:55:30Z | + + + Scenario: Replication can be initialised from a different server + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the replication service at https://custom.replication + | sequence | timestamp | + | 1345 | 2013-07-01T01:00:00Z | + | 1346 | 2013-08-01T01:00:00Z | + | 1347 | 2013-09-01T01:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + + And running osm2pgsql-replication + | init | --server | https://custom.replication | + + Then table osm2pgsql_properties contains + | property | value | + | replication_base_url | https://custom.replication | + | replication_sequence_number | 1346 | + | replication_timestamp | 2013-08-03T15:55:30Z | + + + Scenario: Updates need an initialised replication + Given the OSM data + """ + n34 Tamenity=restaurant x77 y45.3 + """ + And the replication service at https://planet.openstreetmap.org/replication/minute + When running osm2pgsql pgsql with parameters + | --slim | + + Then running osm2pgsql-replication fails with returncode 1 + | update | + And the error output contains + """ + Updates not set up correctly. + """ + + Scenario: Updates run until the end (exactly one application) + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the replication service at http://example.com/europe/liechtenstein-updates + | sequence | timestamp | + | 9999999 | 2013-08-01T01:00:02Z | + | 10000000 | 2013-09-01T01:00:00Z | + | 10000001 | 2013-10-01T01:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + And running osm2pgsql-replication + | init | + And running osm2pgsql-replication + | update | + + Then table osm2pgsql_properties contains + | property | value | + | replication_base_url | http://example.com/europe/liechtenstein-updates | + | replication_sequence_number | 10000001 | + | replication_timestamp | 2013-10-01T01:00:00Z | + + + Scenario: Updates run until the end (multiple applications) + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the replication service at http://example.com/europe/liechtenstein-updates + | sequence | timestamp | + | 9999999 | 2013-08-01T01:00:02Z | + | 10000000 | 2013-09-01T01:00:00Z | + | 10000001 | 2013-10-01T01:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + And running osm2pgsql-replication + | init | + And running osm2pgsql-replication + | update | --max-diff-size | 1 | + + Then table osm2pgsql_properties contains + | property | value | + | replication_base_url | http://example.com/europe/liechtenstein-updates | + | replication_sequence_number | 10000001 | + | replication_timestamp | 2013-10-01T01:00:00Z | + + + Scenario: Updates can run only once + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the replication service at http://example.com/europe/liechtenstein-updates + | sequence | timestamp | + | 9999999 | 2013-08-01T01:00:02Z | + | 10000000 | 2013-09-01T01:00:00Z | + | 10000001 | 2013-10-01T01:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + And running osm2pgsql-replication + | init | + And running osm2pgsql-replication + | update | --once | --max-diff-size | 1 | + + Then table osm2pgsql_properties contains + | property | value | + | replication_base_url | http://example.com/europe/liechtenstein-updates | + | replication_sequence_number | 10000000 | + | replication_timestamp | 2013-09-01T01:00:00Z | + + + Scenario: Status of an uninitialised database fails + Given the OSM data + """ + n34 Tamenity=restaurant x77 y45.3 + """ + And the replication service at https://planet.openstreetmap.org/replication/minute + When running osm2pgsql pgsql with parameters + | --slim | + + Then running osm2pgsql-replication fails with returncode 2 + | status | --json | + And the standard output contains + """ + "status": 2 + "error": "Updates not set up correctly. + """ + + Scenario: Status of a freshly initialised database + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the replication service at http://example.com/europe/liechtenstein-updates + | sequence | timestamp | + | 9999999 | 2013-08-01T01:00:02Z | + | 10000000 | 2013-09-01T01:00:00Z | + | 10000001 | 2013-10-01T01:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + + And running osm2pgsql-replication + | status | --json | + Then the standard output contains + """ + "status": 0 + "server": {"base_url": "http://example.com/europe/liechtenstein-updates", "sequence": 10000001, "timestamp": "2013-10-01T01:00:00Z" + "local": {"sequence": 9999999, "timestamp": "2013-08-03T19:00:02Z" + """ diff --git a/tests/bdd/command-line/replication_legacy.feature b/tests/bdd/command-line/replication_legacy.feature index 93126275d..d5734f41e 100644 --- a/tests/bdd/command-line/replication_legacy.feature +++ b/tests/bdd/command-line/replication_legacy.feature @@ -9,6 +9,7 @@ Feature: Tests for the osm2pgsql-replication script without property table """ Scenario: Replication can be initialised with a osm file + Given the replication service at http://example.com/europe/liechtenstein-updates When running osm2pgsql pgsql with parameters | --slim | And deleting table osm2pgsql_properties @@ -22,6 +23,7 @@ Feature: Tests for the osm2pgsql-replication script without property table Scenario: Replication cannot be initialised from a osm file without replication info + Given the replication service at http://example.com/europe/liechtenstein-updates When running osm2pgsql pgsql with parameters | --slim | And deleting table osm2pgsql_properties @@ -35,6 +37,7 @@ Feature: Tests for the osm2pgsql-replication script without property table Scenario: Replication can be initialised in different schema + Given the replication service at http://example.com/europe/liechtenstein-updates Given the database schema foobar When running osm2pgsql pgsql with parameters | --slim | --middle-schema=foobar | @@ -52,6 +55,7 @@ Feature: Tests for the osm2pgsql-replication script without property table Scenario: Replication must be initialised in the same schema as rest of middle + Given the replication service at http://example.com/europe/liechtenstein-updates Given the database schema foobar When running osm2pgsql pgsql with parameters | --slim | --middle-schema=foobar | @@ -66,3 +70,173 @@ Feature: Tests for the osm2pgsql-replication script without property table Database needs to be imported in --slim mode. """ + Scenario: Replication can be initialised with a fixed start date + Given the replication service at https://planet.openstreetmap.org/replication/minute + | sequence | timestamp | + | 345 | 2020-10-04T01:00:00Z | + | 346 | 2020-10-14T02:00:00Z | + | 347 | 2020-10-24T03:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + And deleting table osm2pgsql_properties + + And running osm2pgsql-replication + | init | --start-at | 2020-10-22T04:05:06Z | + + Then table planet_osm_replication_status contains exactly + | url | sequence | importdate at time zone 'UTC' | + | https://planet.openstreetmap.org/replication/minute | 346 | 2020-10-22 04:05:06 | + + + Scenario: Replication can be initialised from the data in the database + Given the replication service at https://planet.openstreetmap.org/replication/minute + | sequence | timestamp | + | 345 | 2020-10-04T01:00:00Z | + | 346 | 2020-10-14T02:00:00Z | + | 347 | 2020-10-24T03:00:00Z | + And the URL https://www.openstreetmap.org/api/0.6/way/4/1 returns + """ + {"version":"0.6","generator":"OpenStreetMap server","copyright":"OpenStreetMap and contributors","attribution":"http://www.openstreetmap.org/copyright","license":"http://opendatacommons.org/licenses/odbl/1-0/","elements":[{"type":"way","id":4,"timestamp":"2020-10-15T12:21:53Z","version":1,"changeset":234165,"user":"ewg2","uid":2,"nodes":[34,35],"tags":{"highway":"residential"}}]} + """ + When running osm2pgsql pgsql with parameters + | --slim | + And deleting table osm2pgsql_properties + + And running osm2pgsql-replication + | init | + + Then table planet_osm_replication_status contains exactly + | url | sequence | importdate at time zone 'UTC' | + | https://planet.openstreetmap.org/replication/minute | 346 | 2020-10-15 12:21:53 | + + + Scenario: Replication can be initialised from the data in the database with rollback + Given the replication service at https://planet.openstreetmap.org/replication/minute + | sequence | timestamp | + | 345 | 2020-10-04T01:00:00Z | + | 346 | 2020-10-14T02:00:00Z | + | 347 | 2020-10-24T03:00:00Z | + And the URL https://www.openstreetmap.org/api/0.6/way/4/1 returns + """ + {"version":"0.6","generator":"OpenStreetMap server","copyright":"OpenStreetMap and contributors","attribution":"http://www.openstreetmap.org/copyright","license":"http://opendatacommons.org/licenses/odbl/1-0/","elements":[{"type":"way","id":4,"timestamp":"2020-10-15T12:21:53Z","version":1,"changeset":234165,"user":"ewg2","uid":2,"nodes":[34,35],"tags":{"highway":"residential"}}]} + """ + When running osm2pgsql pgsql with parameters + | --slim | + And deleting table osm2pgsql_properties + + And running osm2pgsql-replication + | init | --start-at | 120 | + + Then table planet_osm_replication_status contains exactly + | url | sequence | importdate at time zone 'UTC' | + | https://planet.openstreetmap.org/replication/minute | 346 | 2020-10-15 10:21:53 | + + + Scenario: Updates need an initialised replication + Given the replication service at https://planet.openstreetmap.org/replication/minute + When running osm2pgsql pgsql with parameters + | --slim | + + Then running osm2pgsql-replication fails with returncode 1 + | update | + And the error output contains + """ + Updates not set up correctly. + """ + + Scenario: Updates run until the end (exactly one application) + Given the replication service at https://planet.openstreetmap.org/replication/minute + | sequence | timestamp | + | 345 | 2020-10-04T01:00:00Z | + | 346 | 2020-10-14T02:00:00Z | + | 347 | 2020-10-24T03:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + And running osm2pgsql-replication + | init | --start-at | 2020-10-04T01:05:06Z | + And running osm2pgsql-replication + | update | + + Then table osm2pgsql_properties contains + | property | value | + | replication_base_url | https://planet.openstreetmap.org/replication/minute | + | replication_sequence_number | 347 | + | replication_timestamp | 2020-10-24T03:00:00Z | + + + Scenario: Updates run until the end (multiple applications) + Given the replication service at https://planet.openstreetmap.org/replication/minute + | sequence | timestamp | + | 345 | 2020-10-04T01:00:00Z | + | 346 | 2020-10-14T02:00:00Z | + | 347 | 2020-10-24T03:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + And running osm2pgsql-replication + | init | --start-at | 2020-10-04T01:05:06Z | + And running osm2pgsql-replication + | update | --max-diff-size | 1 | + + Then table osm2pgsql_properties contains + | property | value | + | replication_base_url | https://planet.openstreetmap.org/replication/minute | + | replication_sequence_number | 347 | + | replication_timestamp | 2020-10-24T03:00:00Z | + + + Scenario: Updates can run only once + Given the replication service at https://planet.openstreetmap.org/replication/minute + | sequence | timestamp | + | 345 | 2020-10-04T01:00:00Z | + | 346 | 2020-10-14T02:00:00Z | + | 347 | 2020-10-24T03:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + And running osm2pgsql-replication + | init | --start-at | 2020-10-04T01:05:06Z | + And running osm2pgsql-replication + | update | --max-diff-size | 1 | --once | + + Then table osm2pgsql_properties contains + | property | value | + | replication_base_url | https://planet.openstreetmap.org/replication/minute | + | replication_sequence_number | 346 | + | replication_timestamp | 2020-10-14T02:00:00Z | + + + + Scenario: Status of an uninitialised database fails + Given the replication service at https://planet.openstreetmap.org/replication/minute + When running osm2pgsql pgsql with parameters + | --slim | + And deleting table osm2pgsql_properties + + Then running osm2pgsql-replication fails with returncode 1 + | status | --json | + And the standard output contains + """ + "status": 1 + "error": "Cannot find replication status table + """ + + Scenario: Status of a freshly initialised database + Given the replication service at https://planet.openstreetmap.org/replication/minute + | sequence | timestamp | + | 345 | 2020-10-04T01:00:00Z | + | 346 | 2020-10-14T02:00:00Z | + | 347 | 2020-10-24T03:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + And deleting table osm2pgsql_properties + + And running osm2pgsql-replication + | init | --start-at | 2020-10-22T04:05:06Z | + + And running osm2pgsql-replication + | status | --json | + Then the standard output contains + """ + "status": 0 + "server": {"base_url": "https://planet.openstreetmap.org/replication/minute", "sequence": 347, "timestamp": "2020-10-24T03:00:00Z" + "local": {"sequence": 346, "timestamp": "2020-10-22T04:05:06Z" + """ diff --git a/tests/bdd/environment.py b/tests/bdd/environment.py index 50e597992..d13fd7bce 100644 --- a/tests/bdd/environment.py +++ b/tests/bdd/environment.py @@ -9,6 +9,7 @@ import subprocess import tempfile import importlib.util +import io from importlib.machinery import SourceFileLoader from behave import * @@ -16,6 +17,7 @@ from psycopg2 import sql from steps.geometry_factory import GeometryFactory +from steps.replication_server_mock import ReplicationServerMock TEST_BASE_DIR = (Path(__file__) / '..' / '..').resolve() @@ -115,6 +117,16 @@ def before_scenario(context, scenario): context.osm2pgsql_params = [] context.workdir = use_fixture(working_directory, context) context.geometry_factory = GeometryFactory() + context.osm2pgsql_replication.ReplicationServer = ReplicationServerMock() + context.urlrequest_responses = {} + + def _mock_urlopen(request): + if not request.full_url in context.urlrequest_responses: + raise urllib.error.URLError('Unknown URL') + + return closing(io.BytesIO(context.urlrequest_responses[request.full_url].encode('utf-8'))) + + context.osm2pgsql_replication.urlrequest.urlopen = _mock_urlopen @fixture diff --git a/tests/bdd/steps/replication_server_mock.py b/tests/bdd/steps/replication_server_mock.py new file mode 100644 index 000000000..4d0223bef --- /dev/null +++ b/tests/bdd/steps/replication_server_mock.py @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# This file is part of osm2pgsql (https://osm2pgsql.org/). +# +# Copyright (C) 2023 by the osm2pgsql developer community. +# For a full list of authors see the git log. + + +class ReplicationServerMock: + + def __init__(self): + self.expected_base_url = None + self.state_infos = [] + + + def __call__(self, base_url): + assert self.expected_base_url is not None and base_url == self.expected_base_url,\ + f"Wrong replication service called. Expected '{self.expected_base_url}', got '{base_url}'" + return self + + + def get_state_info(self, seq=None, retries=2): + assert self.state_infos, 'Replication mock not propoerly set up' + if seq is None: + return self.state_infos[-1] + + for info in self.state_infos: + if info.sequence == seq: + return info + + assert False, f"No sequence information for sequence ID {seq}." + + def timestamp_to_sequence(self, timestamp, balanced_search=False): + assert self.state_infos, 'Replication mock not propoerly set up' + + if timestamp < self.state_infos[0].timestamp: + return self.state_infos[0].sequence + + prev = self.state_infos[0] + for info in self.state_infos: + if timestamp >= prev.timestamp and timestamp < info.timestamp: + return prev.sequence + prev = info + + return prev.sequence + + def apply_diffs(self, handler, start_id, max_size=1024, idx="", simplify=True): + if start_id > self.state_infos[-1].sequence: + return None + + numdiffs = int((max_size + 1023)/1024) + return min(self.state_infos[-1].sequence, start_id + numdiffs - 1) + diff --git a/tests/bdd/steps/steps_execute.py b/tests/bdd/steps/steps_execute.py index 593763287..79638ba19 100644 --- a/tests/bdd/steps/steps_execute.py +++ b/tests/bdd/steps/steps_execute.py @@ -13,6 +13,9 @@ import subprocess import contextlib import logging +import datetime as dt + +from osmium.replication.server import OsmosisState def get_import_file(context): if context.import_file is not None: @@ -88,6 +91,13 @@ def run_osm2pgsql_replication(context): if '-d' not in cmdline and '--database' not in cmdline: cmdline.extend(('-d', context.config.userdata['TEST_DB'])) + if cmdline[0] == 'update': + cmdline.extend(('--osm2pgsql-cmd', + str(Path(context.config.userdata['BINARY']).resolve()))) + + if '--' not in cmdline: + cmdline.extend(('--', '-S', str(context.default_data_dir / 'default.style'))) + serr = StringIO() log_handler = logging.StreamHandler(serr) @@ -191,3 +201,18 @@ def check_program_output(context, kind): if line: assert line in s,\ f"Output '{line}' not found in {kind} output:\n{s}\n" + + +@given("the replication service at (?P.*)") +def setup_replication_mock(context, base_url): + context.osm2pgsql_replication.ReplicationServer.expected_base_url = base_url + if context.table: + context.osm2pgsql_replication.ReplicationServer.state_infos =\ + [OsmosisState(int(row[0]), + dt.datetime.strptime(row[1], '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=dt.timezone.utc)) + for row in context.table] + + +@given("the URL (?P.*) returns") +def mock_url_response(context, base_url): + context.urlrequest_responses[base_url] = context.text From ecda8156ba963da76b51c9679cce3f9106dad7a0 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 10 Aug 2023 09:48:33 +0200 Subject: [PATCH 6/9] adapt documentation of osm2pgsql-replication --- docs/osm2pgsql-replication.1 | 63 ++++++++++++++++++++++++++++------- scripts/osm2pgsql-replication | 41 ++++++++++++++++------- 2 files changed, 80 insertions(+), 24 deletions(-) diff --git a/docs/osm2pgsql-replication.1 b/docs/osm2pgsql-replication.1 index 4c8da92c9..36e3da066 100644 --- a/docs/osm2pgsql-replication.1 +++ b/docs/osm2pgsql-replication.1 @@ -41,34 +41,69 @@ usage: osm2pgsql-replication init [-h] [-q] [-v] [-d DB] [-U NAME] [-H HOST] [-P PORT] [-p PREFIX] [--middle-schema MIDDLE_SCHEMA] [--osm-file FILE | --server URL] + [--start-at TIME] Initialise the replication process. .br .br -There are two ways to initialise the replication process: if you have imported +This function sets the replication service to use and determines from .br -from a file that contains replication source information, then the +which date to apply updates. You must call this function at least once .br -initialisation process can use this and set up replication from there. +to set up the replication process. It can safely be called again later .br -Use the command \(cqosm2pgsql-replication \-\-osm\-file \(cq for this. +to change the replication servers or to roll back the update process and +.br +reapply updates. .br .br -If the file has no replication information or you don't have the initial +There are different methods available for initialisation. When no +.br +additional parameters are given, the data is initialised from the data .br -import file anymore then replication can be set up according to +in the database. If the data was imported from a file with replication .br -the data found in the database. It checks the planet_osm_way table for the +information and the properties table is available (for osm2pgsql >= 1.9) .br -newest way in the database and then queries the OSM API when the way was +then the replication from the file is used. Otherwise the minutely .br -created. The date is used as the start date for replication. In this mode +update service from openstreetmap.org is used as the default replication .br -the minutely diffs from the OSM servers are used as a source. You can change +service. The start date is either taken from the database timestamp +.br +(for osm2pgsql >= 1.9) or determined from the newest way in the database +.br +by querying the OSM API about its creation date. .br -this with the \(cq\-\-server\(cq parameter. + +.br +The replication service can be changed with the \(cq\-\-server\(cq parameter. +.br +To use a different start date, add \(cq\-\-start\-at\(cq with an absolute +.br +ISO timestamp (e.g. 2007\-08\-20T12:21:53Z). When the program determines the +.br +start date from the database timestamp or way creation date, then it +.br +subtracts another 3 hours by default to ensure that all new changes are +.br +available. To change this rollback period, use \(cq\-\-start\-at\(cq with the +.br +number of minutes to rollback. This rollback mode can also be used to +.br +force initialisation to use the database date and ignore the date +.br +from the replication information in the file. +.br + +.br +The initialisation process can also use replication information from +.br +an OSM file directly and ignore all other date information. +.br +Use the command \(cqosm2pgsql-replication \-\-osm\-file \(cq for this. @@ -110,7 +145,11 @@ Get replication information from the given file. .TP \fB\-\-server\fR URL -Use replication server at the given URL (default: https://planet.openstreetmap.org/replication/minute) +Use replication server at the given URL + +.TP +\fB\-\-start\-at\fR TIME +Time when to start replication. When an absolute timestamp (in ISO format) is given, it will be used. If a number is given, then replication starts the number of minutes before the known date of the database. .SH OPTIONS 'osm2pgsql-replication update' usage: osm2pgsql-replication update update [options] [-- param [param ...]] diff --git a/scripts/osm2pgsql-replication b/scripts/osm2pgsql-replication index 531db2f7e..b84df6937 100755 --- a/scripts/osm2pgsql-replication +++ b/scripts/osm2pgsql-replication @@ -405,18 +405,35 @@ def init(props, args): """\ Initialise the replication process. - There are two ways to initialise the replication process: if you have imported - from a file that contains replication source information, then the - initialisation process can use this and set up replication from there. + This function sets the replication service to use and determines from + which date to apply updates. You must call this function at least once + to set up the replication process. It can safely be called again later + to change the replication servers or to roll back the update process and + reapply updates. + + There are different methods available for initialisation. When no + additional parameters are given, the data is initialised from the data + in the database. If the data was imported from a file with replication + information and the properties table is available (for osm2pgsql >= 1.9) + then the replication from the file is used. Otherwise the minutely + update service from openstreetmap.org is used as the default replication + service. The start date is either taken from the database timestamp + (for osm2pgsql >= 1.9) or determined from the newest way in the database + by querying the OSM API about its creation date. + + The replication service can be changed with the `--server` parameter. + To use a different start date, add `--start-at` with an absolute + ISO timestamp (e.g. 2007-08-20T12:21:53Z). When the program determines the + start date from the database timestamp or way creation date, then it + subtracts another 3 hours by default to ensure that all new changes are + available. To change this rollback period, use `--start-at` with the + number of minutes to rollback. This rollback mode can also be used to + force initialisation to use the database date and ignore the date + from the replication information in the file. + + The initialisation process can also use replication information from + an OSM file directly and ignore all other date information. Use the command `%(prog)s --osm-file ` for this. - - If the file has no replication information or you don't have the initial - import file anymore then replication can be set up according to - the data found in the database. It checks the planet_osm_way table for the - newest way in the database and then queries the OSM API when the way was - created. The date is used as the start date for replication. In this mode - the minutely diffs from the OSM servers are used as a source. You can change - this with the `--server` parameter. """ if args.osm_file is None: base_url, seq, date = props.get_replication_base(args.server, args.start_at) @@ -614,7 +631,7 @@ def get_parser(): srcgrp.add_argument('--osm-file', metavar='FILE', help='Get replication information from the given file.') srcgrp.add_argument('--server', metavar='URL', - help='Use replication server at the given URL (default: %(default)s)') + help='Use replication server at the given URL') grp.add_argument('--start-at', metavar='TIME', type=start_point, help='Time when to start replication. When an absolute timestamp ' '(in ISO format) is given, it will be used. If a number ' From efe911ef12a090ed9a3d081802a20073d0e08bdf Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 10 Aug 2023 09:54:29 +0200 Subject: [PATCH 7/9] osm2pgsql-replication: reintroduce default default rollback --- scripts/osm2pgsql-replication | 8 +++--- tests/bdd/command-line/replication.feature | 25 ++++++++++++++++++- .../command-line/replication_legacy.feature | 2 +- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/scripts/osm2pgsql-replication b/scripts/osm2pgsql-replication index b84df6937..1f0220cbf 100755 --- a/scripts/osm2pgsql-replication +++ b/scripts/osm2pgsql-replication @@ -185,9 +185,8 @@ class Osm2pgsqlProperties: "Use --start-at to set an explicit date.") date = from_osm_date(date) - if start_at is not None: - date -= dt.timedelta(minutes=start_at) - seq = None + date -= dt.timedelta(minutes=start_at or 180) + seq = None else: seq = int(seq) @@ -261,8 +260,7 @@ class LegacyProperties: except ValueError: raise DBError(1, f"Cannot parse timestamp '{date}'") - if isinstance(start_at, int): - date -= dt.timedelta(minutes=start_at) + date -= dt.timedelta(minutes=start_at or 180) return server, None, date diff --git a/tests/bdd/command-line/replication.feature b/tests/bdd/command-line/replication.feature index e2d36de5e..8629f6282 100644 --- a/tests/bdd/command-line/replication.feature +++ b/tests/bdd/command-line/replication.feature @@ -135,6 +135,29 @@ Feature: Tests for the osm2pgsql-replication script with property table | replication_timestamp | 2020-10-04T03:30:00Z | + Scenario: Replication can be initialised from database date + Given the OSM data + """ + n34 Tamenity=restaurant x77 y45.3 t2020-10-04T04:00:01Z + """ + And the replication service at https://planet.openstreetmap.org/replication/minute + | sequence | timestamp | + | 345 | 2020-10-04T01:00:00Z | + | 346 | 2020-10-04T02:00:00Z | + | 347 | 2020-10-04T03:00:00Z | + When running osm2pgsql pgsql with parameters + | --slim | + + And running osm2pgsql-replication + | init | + + Then table osm2pgsql_properties contains + | property | value | + | replication_base_url | https://planet.openstreetmap.org/replication/minute | + | replication_sequence_number | 345 | + | replication_timestamp | 2020-10-04T01:00:01Z | + + Scenario: Replication can be initialised with a rollback (no previous replication info) Given the OSM data """ @@ -195,7 +218,7 @@ Feature: Tests for the osm2pgsql-replication script with property table | property | value | | replication_base_url | https://custom.replication | | replication_sequence_number | 1346 | - | replication_timestamp | 2013-08-03T15:55:30Z | + | replication_timestamp | 2013-08-03T12:55:30Z | Scenario: Updates need an initialised replication diff --git a/tests/bdd/command-line/replication_legacy.feature b/tests/bdd/command-line/replication_legacy.feature index d5734f41e..5835ccac8 100644 --- a/tests/bdd/command-line/replication_legacy.feature +++ b/tests/bdd/command-line/replication_legacy.feature @@ -107,7 +107,7 @@ Feature: Tests for the osm2pgsql-replication script without property table Then table planet_osm_replication_status contains exactly | url | sequence | importdate at time zone 'UTC' | - | https://planet.openstreetmap.org/replication/minute | 346 | 2020-10-15 12:21:53 | + | https://planet.openstreetmap.org/replication/minute | 346 | 2020-10-15 09:21:53 | Scenario: Replication can be initialised from the data in the database with rollback From 1dcdf3f9bd99f8ed4351512b0de891ef88a5703e Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 10 Aug 2023 22:13:25 +0200 Subject: [PATCH 8/9] fix branching --- scripts/osm2pgsql-replication | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/osm2pgsql-replication b/scripts/osm2pgsql-replication index 1f0220cbf..aab422dca 100755 --- a/scripts/osm2pgsql-replication +++ b/scripts/osm2pgsql-replication @@ -117,10 +117,10 @@ class DBConnection: if args.database and any(part in args.database for part in ['=', '://']): self.conn = psycopg.connect(args.database, fallback_application_name="osm2pgsql-replication") - - self.conn = psycopg.connect(dbname=args.database, user=args.username, - host=args.host, port=args.port, - fallback_application_name="osm2pgsql-replication") + else: + self.conn = psycopg.connect(dbname=args.database, user=args.username, + host=args.host, port=args.port, + fallback_application_name="osm2pgsql-replication") self.name = self.conn.get_dsn_parameters()['dbname'] @@ -133,7 +133,7 @@ class DBConnection: def table_exists(self, table_name): with self.conn.cursor() as cur: - cur.execute('SELECT * FROM pg_tables where tablename = %s and schemaname = %s ', + cur.execute('SELECT * FROM pg_tables WHERE tablename = %s and schemaname = %s ', (table_name, self.schema)) return cur.rowcount > 0 From cb5db908c289370a2ac65ef331421709d91d078d Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 10 Aug 2023 22:20:46 +0200 Subject: [PATCH 9/9] fix typos --- tests/bdd/command-line/replication.feature | 4 ++-- tests/bdd/environment.py | 3 ++- tests/bdd/steps/replication_server_mock.py | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/bdd/command-line/replication.feature b/tests/bdd/command-line/replication.feature index 8629f6282..579d2ad04 100644 --- a/tests/bdd/command-line/replication.feature +++ b/tests/bdd/command-line/replication.feature @@ -35,7 +35,7 @@ Feature: Tests for the osm2pgsql-replication script with property table | replication_timestamp | 2013-08-03T19:00:02Z | - Scenario: Replication cannot be initialsed when date information is missing + Scenario: Replication cannot be initialised when date information is missing Given the OSM data """ n34 Tamenity=restaurant x77 y45.3 @@ -78,7 +78,7 @@ Feature: Tests for the osm2pgsql-replication script with property table | replication_timestamp | 2013-08-03T19:00:02Z | - Scenario: Replication initialiasion will fail for a database in a different schema + Scenario: Replication initialisation will fail for a database in a different schema Given the database schema foobar Given the input file 'liechtenstein-2013-08-03.osm.pbf' And the replication service at http://example.com/europe/liechtenstein-updates diff --git a/tests/bdd/environment.py b/tests/bdd/environment.py index d13fd7bce..fe8407527 100644 --- a/tests/bdd/environment.py +++ b/tests/bdd/environment.py @@ -96,7 +96,8 @@ def before_all(context): # Set up replication script. replicationfile = str(Path(context.config.userdata['REPLICATION_SCRIPT']).resolve()) spec = importlib.util.spec_from_loader('osm2pgsql_replication', - SourceFileLoader( 'osm2pgsql_replication',replicationfile)) + SourceFileLoader('osm2pgsql_replication', + replicationfile)) assert spec, f"File not found: {replicationfile}" context.osm2pgsql_replication = importlib.util.module_from_spec(spec) spec.loader.exec_module(context.osm2pgsql_replication) diff --git a/tests/bdd/steps/replication_server_mock.py b/tests/bdd/steps/replication_server_mock.py index 4d0223bef..8fbc9ec12 100644 --- a/tests/bdd/steps/replication_server_mock.py +++ b/tests/bdd/steps/replication_server_mock.py @@ -20,7 +20,7 @@ def __call__(self, base_url): def get_state_info(self, seq=None, retries=2): - assert self.state_infos, 'Replication mock not propoerly set up' + assert self.state_infos, 'Replication mock not properly set up' if seq is None: return self.state_infos[-1] @@ -31,7 +31,7 @@ def get_state_info(self, seq=None, retries=2): assert False, f"No sequence information for sequence ID {seq}." def timestamp_to_sequence(self, timestamp, balanced_search=False): - assert self.state_infos, 'Replication mock not propoerly set up' + assert self.state_infos, 'Replication mock not properly set up' if timestamp < self.state_infos[0].timestamp: return self.state_infos[0].sequence