From cc2481a2a960520e5b14bd3d128d7c697c59e058 Mon Sep 17 00:00:00 2001 From: Paul Haesler Date: Fri, 21 Jul 2023 13:03:42 +1000 Subject: [PATCH 1/6] Suppress SQLAlchemy warnings. --- datacube/drivers/postgis/_api.py | 28 ++++++++++++++-------------- datacube/drivers/postgres/_core.py | 4 ++-- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/datacube/drivers/postgis/_api.py b/datacube/drivers/postgis/_api.py index df87318b6..de71842e6 100644 --- a/datacube/drivers/postgis/_api.py +++ b/datacube/drivers/postgis/_api.py @@ -389,9 +389,9 @@ def spatial_extent(self, ids, crs): if SpatialIndex is None: return None result = self._connection.execute( - select([ + select( func.ST_AsGeoJSON(func.ST_Union(SpatialIndex.extent)) - ]).select_from( + ).select_from( SpatialIndex ).where( SpatialIndex.dataset_ref.in_(ids) @@ -528,12 +528,12 @@ def delete_dataset(self, dataset_id): def get_dataset(self, dataset_id): return self._connection.execute( - select(_dataset_select_fields()).where(Dataset.id == dataset_id) + select(*_dataset_select_fields()).where(Dataset.id == dataset_id) ).first() def get_datasets(self, dataset_ids): return self._connection.execute( - select(_dataset_select_fields()).where(Dataset.id.in_(dataset_ids)) + select(*_dataset_select_fields()).where(Dataset.id.in_(dataset_ids)) ).fetchall() def get_derived_datasets(self, dataset_id): @@ -551,7 +551,7 @@ def search_datasets_by_metadata(self, metadata): """ # Find any storage types whose 'dataset_metadata' document is a subset of the metadata. return self._connection.execute( - select(_dataset_select_fields()).where(Dataset.metadata_doc.contains(metadata)) + select(*_dataset_select_fields()).where(Dataset.metadata_doc.contains(metadata)) ).fetchall() def search_products_by_metadata(self, metadata): @@ -622,7 +622,7 @@ def search_datasets_query(self, raw_expressions = PostgisDbAPI._alchemify_expressions(expressions) join_tables = PostgisDbAPI._join_tables(expressions, select_fields) where_expr = and_(Dataset.archived == None, *raw_expressions) - query = select(select_columns).select_from(Dataset) + query = select(*select_columns).select_from(Dataset) for joins in join_tables: query = query.join(*joins) if spatialquery is not None: @@ -664,7 +664,7 @@ def bulk_simple_dataset_search(self, products=None, batch_size=0): if batch_size > 0 and not self.in_transaction: raise ValueError("Postgresql bulk reads must occur within a transaction.") query = select( - _dataset_bulk_select_fields() + *_dataset_bulk_select_fields() ).select_from(Dataset).where( Dataset.archived == None ) @@ -763,24 +763,24 @@ def count_datasets_through_time(self, start, end, period, time_field, expression def count_datasets_through_time_query(self, start, end, period, time_field, expressions): raw_expressions = self._alchemify_expressions(expressions) - start_times = select(( + start_times = select( func.generate_series(start, end, cast(period, INTERVAL)).label('start_time'), - )).alias('start_times') + ).alias('start_times') time_range_select = ( - select(( + select( func.tstzrange( start_times.c.start_time, func.lead(start_times.c.start_time).over() ).label('time_period'), - )) + ) ).alias('all_time_ranges') # Exclude the trailing (end time to infinite) row. Is there a simpler way? time_ranges = ( - select(( + select( time_range_select, - )).where( + ).where( ~func.upper_inf(time_range_select.c.time_period) ) ).alias('time_ranges') @@ -797,7 +797,7 @@ def count_datasets_through_time_query(self, start, end, period, time_field, expr ) ) - return select((time_ranges.c.time_period, count_query.label('dataset_count'))) + return select(time_ranges.c.time_period, count_query.label('dataset_count')) def update_search_index(self, product_names: Sequence[str] = [], dsids: Sequence[DSID] = []): """ diff --git a/datacube/drivers/postgres/_core.py b/datacube/drivers/postgres/_core.py index 61220ebe2..e79b075cb 100644 --- a/datacube/drivers/postgres/_core.py +++ b/datacube/drivers/postgres/_core.py @@ -17,7 +17,7 @@ pg_column_exists) from sqlalchemy import MetaData, inspect, text from sqlalchemy.engine import Engine -from sqlalchemy.schema import CreateSchema +from sqlalchemy.schema import CreateSchema, DropSchema USER_ROLES = ('agdc_user', 'agdc_ingest', 'agdc_manage', 'agdc_admin') @@ -239,7 +239,7 @@ def has_schema(engine): def drop_db(connection): - connection.execute(text(f'drop schema if exists {SCHEMA_NAME} cascade;')) + connection.execute(DropSchema(SCHEMA_NAME, cascade=True)) def to_pg_role(role): From 222a2c0f45d6a6e459ea8e55c3106fffcaefa266 Mon Sep 17 00:00:00 2001 From: Paul Haesler Date: Fri, 21 Jul 2023 13:19:17 +1000 Subject: [PATCH 2/6] A couple more SQLAlchemy warning suppressed. --- datacube/drivers/postgis/_api.py | 3 ++- datacube/index/postgis/_datasets.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/datacube/drivers/postgis/_api.py b/datacube/drivers/postgis/_api.py index de71842e6..cc5e48c17 100644 --- a/datacube/drivers/postgis/_api.py +++ b/datacube/drivers/postgis/_api.py @@ -709,8 +709,9 @@ def get_duplicates(self, match_fields: Sequence[PgField], expressions: Sequence[ group_expressions = tuple(f.alchemy_expression for f in match_fields) join_tables = PostgisDbAPI._join_tables(expressions, match_fields) + cols = (func.array_agg(Dataset.id),) + group_expressions query = select( - (func.array_agg(Dataset.id),) + group_expressions + *cols ).select_from(Dataset) for joins in join_tables: query = query.join(*joins) diff --git a/datacube/index/postgis/_datasets.py b/datacube/index/postgis/_datasets.py index b656e923c..46eb4dfae 100755 --- a/datacube/index/postgis/_datasets.py +++ b/datacube/index/postgis/_datasets.py @@ -769,7 +769,7 @@ def search_summaries(self, **query): """ for _, results in self._do_search_by_product(query, return_fields=True): for columns in results: - output = dict(columns) + output = columns._asdict() _LOG.warning("search results: %s (%s)", output["id"], output["product"]) yield output From 98eeb6edb3a31f248fa48c7fd5f76700b66a7285 Mon Sep 17 00:00:00 2001 From: Paul Haesler Date: Mon, 24 Jul 2023 10:55:25 +1000 Subject: [PATCH 3/6] Pin jsonschema to <4.18 to prevent deprecation warnings. --- conda-environment.yml | 2 +- docker/constraints.in | 2 +- docker/constraints.txt | 13 ++++++++++++- setup.py | 2 +- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/conda-environment.yml b/conda-environment.yml index 960faf44b..bd674c917 100644 --- a/conda-environment.yml +++ b/conda-environment.yml @@ -22,7 +22,7 @@ dependencies: - dask - pyproj >=2.5 - shapely >=2.0 - - jsonschema + - jsonschema <4.18 - lark - netcdf4 - numpy diff --git a/docker/constraints.in b/docker/constraints.in index 4d21517cd..67b98b60e 100644 --- a/docker/constraints.in +++ b/docker/constraints.in @@ -14,7 +14,7 @@ dask>=2021.10.1 distributed>=2021.10.0 fiona geoalchemy2 -jsonschema +jsonschema<4.18 # Was lark-parser>=0.6.7 lark matplotlib diff --git a/docker/constraints.txt b/docker/constraints.txt index f49146132..98de0175e 100644 --- a/docker/constraints.txt +++ b/docker/constraints.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile --strip-extras constraints.in @@ -114,6 +114,10 @@ docutils==0.18.1 # sphinx # sphinx-click # sphinx-rtd-theme +exceptiongroup==1.1.2 + # via + # hypothesis + # pytest fiona==1.9.1 # via -r constraints.in fonttools==4.38.0 @@ -326,6 +330,8 @@ rich==13.3.1 # via twine ruamel-yaml==0.17.21 # via -r constraints.in +ruamel-yaml-clib==0.2.7 + # via ruamel-yaml s3transfer==0.6.0 # via boto3 secretstorage==3.3.3 @@ -386,6 +392,11 @@ toml==0.10.2 # via # -r constraints.in # responses +tomli==2.0.1 + # via + # coverage + # pytest + # setuptools-scm toolz==0.12.0 # via # -r constraints.in diff --git a/setup.py b/setup.py index 87115bad4..ef65bcb94 100755 --- a/setup.py +++ b/setup.py @@ -96,7 +96,7 @@ 'cloudpickle>=0.4', 'dask[array]', 'distributed', - 'jsonschema', + 'jsonschema<4.18', 'netcdf4', 'numpy', 'psycopg2', From f8011ce2312ba09972d73b4b4e70491643927ae3 Mon Sep 17 00:00:00 2001 From: Paul Haesler Date: Mon, 24 Jul 2023 10:57:39 +1000 Subject: [PATCH 4/6] Update whats_new.rst. --- docs/about/whats_new.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/about/whats_new.rst b/docs/about/whats_new.rst index e8509c288..ebf797367 100644 --- a/docs/about/whats_new.rst +++ b/docs/about/whats_new.rst @@ -10,6 +10,7 @@ v1.8.next - Improve error message for mismatch between dataset metadata and product signature (:pull:`1472`) - Mark ``--confirm-ignore-lineage``, ``--auto-add-lineage``, and ``--verify-lineage`` as deprecated or to be deprecated (:pull:`1472`) - Default delta values in ``archive_less_mature`` and ``find_less_mature`` (:pull:`1472`) +- Fix SQLAlchemy calls and pin jsonschema version to supporess deprecation warnings (:pull:`1476`) v1.8.15 (11th July 2023) ======================== From 48a22e5324f72a06d7c31bfa89216f6f9e3a03ae Mon Sep 17 00:00:00 2001 From: Paul Haesler Date: Mon, 24 Jul 2023 11:38:02 +1000 Subject: [PATCH 5/6] Fixed spelling mistake in whats_new and fixed drop schema where no schema does not exist. --- datacube/drivers/postgis/_core.py | 5 ++++- datacube/drivers/postgres/_core.py | 5 +++-- docs/about/whats_new.rst | 2 +- wordlist.txt | 1 + 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/datacube/drivers/postgis/_core.py b/datacube/drivers/postgis/_core.py index 00870799a..1c40aa353 100644 --- a/datacube/drivers/postgis/_core.py +++ b/datacube/drivers/postgis/_core.py @@ -11,6 +11,7 @@ from sqlalchemy import MetaData, inspect, text from sqlalchemy.engine import Engine from sqlalchemy.schema import CreateSchema +from sqlalchemy.sql.ddl import DropSchema from datacube.drivers.postgis.sql import (INSTALL_TRIGGER_SQL_TEMPLATE, SCHEMA_NAME, TYPES_INIT_SQL, @@ -225,7 +226,9 @@ def has_schema(engine): def drop_db(connection): - connection.execute(text(f'drop schema if exists {SCHEMA_NAME} cascade')) + # if_exists parameter seems to not be working in SQLA1.4? + if has_schema(connection.engine): + connection.execute(DropSchema(SCHEMA_NAME, cascade=True, if_exists=True)) def to_pg_role(role): diff --git a/datacube/drivers/postgres/_core.py b/datacube/drivers/postgres/_core.py index e79b075cb..a14f7f36c 100644 --- a/datacube/drivers/postgres/_core.py +++ b/datacube/drivers/postgres/_core.py @@ -97,7 +97,6 @@ def ensure_db(engine, with_permissions=True): grant all on database {db} to agdc_admin; """.format(db=quoted_db_name))) - if not has_schema(engine): is_new = True try: sqla_txn = c.begin() @@ -239,7 +238,9 @@ def has_schema(engine): def drop_db(connection): - connection.execute(DropSchema(SCHEMA_NAME, cascade=True)) + # if_exists parameter seems to not be working in SQLA1.4? + if has_schema(connection.engine): + connection.execute(DropSchema(SCHEMA_NAME, cascade=True, if_exists=True)) def to_pg_role(role): diff --git a/docs/about/whats_new.rst b/docs/about/whats_new.rst index ebf797367..dd83d2537 100644 --- a/docs/about/whats_new.rst +++ b/docs/about/whats_new.rst @@ -10,7 +10,7 @@ v1.8.next - Improve error message for mismatch between dataset metadata and product signature (:pull:`1472`) - Mark ``--confirm-ignore-lineage``, ``--auto-add-lineage``, and ``--verify-lineage`` as deprecated or to be deprecated (:pull:`1472`) - Default delta values in ``archive_less_mature`` and ``find_less_mature`` (:pull:`1472`) -- Fix SQLAlchemy calls and pin jsonschema version to supporess deprecation warnings (:pull:`1476`) +- Fix SQLAlchemy calls and pin jsonschema version to suppress deprecation warnings (:pull:`1476`) v1.8.15 (11th July 2023) ======================== diff --git a/wordlist.txt b/wordlist.txt index e96a7904d..2f935c5da 100644 --- a/wordlist.txt +++ b/wordlist.txt @@ -224,6 +224,7 @@ jfEZEOkxRXgNsAsHEC jpg JSON jsonify +jsonschema Jupyter jupyter JupyterLab From ac52a4a2f3b6b55efe2f2c5a525f74c258cab914 Mon Sep 17 00:00:00 2001 From: Paul Haesler Date: Mon, 24 Jul 2023 12:05:22 +1000 Subject: [PATCH 6/6] SQLAlchema create/drop schema doesn't seem to support "if (not) exists" clauses properly in 1.4. --- datacube/drivers/postgres/_core.py | 46 +++++++++++++++--------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/datacube/drivers/postgres/_core.py b/datacube/drivers/postgres/_core.py index a14f7f36c..20c1a9e07 100644 --- a/datacube/drivers/postgres/_core.py +++ b/datacube/drivers/postgres/_core.py @@ -96,29 +96,29 @@ def ensure_db(engine, with_permissions=True): c.execute(text(""" grant all on database {db} to agdc_admin; """.format(db=quoted_db_name))) - - is_new = True - try: - sqla_txn = c.begin() - if with_permissions: - # Switch to 'agdc_admin', so that all items are owned by them. - c.execute(text('set role agdc_admin')) - _LOG.info('Creating schema.') - c.execute(CreateSchema(SCHEMA_NAME)) - _LOG.info('Creating tables.') - c.execute(text(TYPES_INIT_SQL)) - METADATA.create_all(c) - _LOG.info("Creating triggers.") - install_timestamp_trigger(c) - _LOG.info("Creating added column.") - install_added_column(c) - sqla_txn.commit() - except: # noqa: E722 - sqla_txn.rollback() - raise - finally: - if with_permissions: - c.execute(text('set role {}'.format(quoted_user))) + if not has_schema(engine): + is_new = True + try: + sqla_txn = c.begin() + if with_permissions: + # Switch to 'agdc_admin', so that all items are owned by them. + c.execute(text('set role agdc_admin')) + _LOG.info('Creating schema.') + c.execute(CreateSchema(SCHEMA_NAME, if_not_exists=True)) + _LOG.info('Creating tables.') + c.execute(text(TYPES_INIT_SQL)) + METADATA.create_all(c) + _LOG.info("Creating triggers.") + install_timestamp_trigger(c) + _LOG.info("Creating added column.") + install_added_column(c) + sqla_txn.commit() + except: # noqa: E722 + sqla_txn.rollback() + raise + finally: + if with_permissions: + c.execute(text('set role {}'.format(quoted_user))) if with_permissions: _LOG.info('Adding role grants.')