diff --git a/conda-environment.yml b/conda-environment.yml index 960faf44b..bd674c917 100644 --- a/conda-environment.yml +++ b/conda-environment.yml @@ -22,7 +22,7 @@ dependencies: - dask - pyproj >=2.5 - shapely >=2.0 - - jsonschema + - jsonschema <4.18 - lark - netcdf4 - numpy diff --git a/datacube/drivers/postgis/_api.py b/datacube/drivers/postgis/_api.py index df87318b6..cc5e48c17 100644 --- a/datacube/drivers/postgis/_api.py +++ b/datacube/drivers/postgis/_api.py @@ -389,9 +389,9 @@ def spatial_extent(self, ids, crs): if SpatialIndex is None: return None result = self._connection.execute( - select([ + select( func.ST_AsGeoJSON(func.ST_Union(SpatialIndex.extent)) - ]).select_from( + ).select_from( SpatialIndex ).where( SpatialIndex.dataset_ref.in_(ids) @@ -528,12 +528,12 @@ def delete_dataset(self, dataset_id): def get_dataset(self, dataset_id): return self._connection.execute( - select(_dataset_select_fields()).where(Dataset.id == dataset_id) + select(*_dataset_select_fields()).where(Dataset.id == dataset_id) ).first() def get_datasets(self, dataset_ids): return self._connection.execute( - select(_dataset_select_fields()).where(Dataset.id.in_(dataset_ids)) + select(*_dataset_select_fields()).where(Dataset.id.in_(dataset_ids)) ).fetchall() def get_derived_datasets(self, dataset_id): @@ -551,7 +551,7 @@ def search_datasets_by_metadata(self, metadata): """ # Find any storage types whose 'dataset_metadata' document is a subset of the metadata. return self._connection.execute( - select(_dataset_select_fields()).where(Dataset.metadata_doc.contains(metadata)) + select(*_dataset_select_fields()).where(Dataset.metadata_doc.contains(metadata)) ).fetchall() def search_products_by_metadata(self, metadata): @@ -622,7 +622,7 @@ def search_datasets_query(self, raw_expressions = PostgisDbAPI._alchemify_expressions(expressions) join_tables = PostgisDbAPI._join_tables(expressions, select_fields) where_expr = and_(Dataset.archived == None, *raw_expressions) - query = select(select_columns).select_from(Dataset) + query = select(*select_columns).select_from(Dataset) for joins in join_tables: query = query.join(*joins) if spatialquery is not None: @@ -664,7 +664,7 @@ def bulk_simple_dataset_search(self, products=None, batch_size=0): if batch_size > 0 and not self.in_transaction: raise ValueError("Postgresql bulk reads must occur within a transaction.") query = select( - _dataset_bulk_select_fields() + *_dataset_bulk_select_fields() ).select_from(Dataset).where( Dataset.archived == None ) @@ -709,8 +709,9 @@ def get_duplicates(self, match_fields: Sequence[PgField], expressions: Sequence[ group_expressions = tuple(f.alchemy_expression for f in match_fields) join_tables = PostgisDbAPI._join_tables(expressions, match_fields) + cols = (func.array_agg(Dataset.id),) + group_expressions query = select( - (func.array_agg(Dataset.id),) + group_expressions + *cols ).select_from(Dataset) for joins in join_tables: query = query.join(*joins) @@ -763,24 +764,24 @@ def count_datasets_through_time(self, start, end, period, time_field, expression def count_datasets_through_time_query(self, start, end, period, time_field, expressions): raw_expressions = self._alchemify_expressions(expressions) - start_times = select(( + start_times = select( func.generate_series(start, end, cast(period, INTERVAL)).label('start_time'), - )).alias('start_times') + ).alias('start_times') time_range_select = ( - select(( + select( func.tstzrange( start_times.c.start_time, func.lead(start_times.c.start_time).over() ).label('time_period'), - )) + ) ).alias('all_time_ranges') # Exclude the trailing (end time to infinite) row. Is there a simpler way? time_ranges = ( - select(( + select( time_range_select, - )).where( + ).where( ~func.upper_inf(time_range_select.c.time_period) ) ).alias('time_ranges') @@ -797,7 +798,7 @@ def count_datasets_through_time_query(self, start, end, period, time_field, expr ) ) - return select((time_ranges.c.time_period, count_query.label('dataset_count'))) + return select(time_ranges.c.time_period, count_query.label('dataset_count')) def update_search_index(self, product_names: Sequence[str] = [], dsids: Sequence[DSID] = []): """ diff --git a/datacube/drivers/postgis/_core.py b/datacube/drivers/postgis/_core.py index 00870799a..1c40aa353 100644 --- a/datacube/drivers/postgis/_core.py +++ b/datacube/drivers/postgis/_core.py @@ -11,6 +11,7 @@ from sqlalchemy import MetaData, inspect, text from sqlalchemy.engine import Engine from sqlalchemy.schema import CreateSchema +from sqlalchemy.sql.ddl import DropSchema from datacube.drivers.postgis.sql import (INSTALL_TRIGGER_SQL_TEMPLATE, SCHEMA_NAME, TYPES_INIT_SQL, @@ -225,7 +226,9 @@ def has_schema(engine): def drop_db(connection): - connection.execute(text(f'drop schema if exists {SCHEMA_NAME} cascade')) + # if_exists parameter seems to not be working in SQLA1.4? + if has_schema(connection.engine): + connection.execute(DropSchema(SCHEMA_NAME, cascade=True, if_exists=True)) def to_pg_role(role): diff --git a/datacube/drivers/postgres/_core.py b/datacube/drivers/postgres/_core.py index 61220ebe2..20c1a9e07 100644 --- a/datacube/drivers/postgres/_core.py +++ b/datacube/drivers/postgres/_core.py @@ -17,7 +17,7 @@ pg_column_exists) from sqlalchemy import MetaData, inspect, text from sqlalchemy.engine import Engine -from sqlalchemy.schema import CreateSchema +from sqlalchemy.schema import CreateSchema, DropSchema USER_ROLES = ('agdc_user', 'agdc_ingest', 'agdc_manage', 'agdc_admin') @@ -96,30 +96,29 @@ def ensure_db(engine, with_permissions=True): c.execute(text(""" grant all on database {db} to agdc_admin; """.format(db=quoted_db_name))) - - if not has_schema(engine): - is_new = True - try: - sqla_txn = c.begin() - if with_permissions: - # Switch to 'agdc_admin', so that all items are owned by them. - c.execute(text('set role agdc_admin')) - _LOG.info('Creating schema.') - c.execute(CreateSchema(SCHEMA_NAME)) - _LOG.info('Creating tables.') - c.execute(text(TYPES_INIT_SQL)) - METADATA.create_all(c) - _LOG.info("Creating triggers.") - install_timestamp_trigger(c) - _LOG.info("Creating added column.") - install_added_column(c) - sqla_txn.commit() - except: # noqa: E722 - sqla_txn.rollback() - raise - finally: - if with_permissions: - c.execute(text('set role {}'.format(quoted_user))) + if not has_schema(engine): + is_new = True + try: + sqla_txn = c.begin() + if with_permissions: + # Switch to 'agdc_admin', so that all items are owned by them. + c.execute(text('set role agdc_admin')) + _LOG.info('Creating schema.') + c.execute(CreateSchema(SCHEMA_NAME, if_not_exists=True)) + _LOG.info('Creating tables.') + c.execute(text(TYPES_INIT_SQL)) + METADATA.create_all(c) + _LOG.info("Creating triggers.") + install_timestamp_trigger(c) + _LOG.info("Creating added column.") + install_added_column(c) + sqla_txn.commit() + except: # noqa: E722 + sqla_txn.rollback() + raise + finally: + if with_permissions: + c.execute(text('set role {}'.format(quoted_user))) if with_permissions: _LOG.info('Adding role grants.') @@ -239,7 +238,9 @@ def has_schema(engine): def drop_db(connection): - connection.execute(text(f'drop schema if exists {SCHEMA_NAME} cascade;')) + # if_exists parameter seems to not be working in SQLA1.4? + if has_schema(connection.engine): + connection.execute(DropSchema(SCHEMA_NAME, cascade=True, if_exists=True)) def to_pg_role(role): diff --git a/datacube/index/postgis/_datasets.py b/datacube/index/postgis/_datasets.py index b656e923c..46eb4dfae 100755 --- a/datacube/index/postgis/_datasets.py +++ b/datacube/index/postgis/_datasets.py @@ -769,7 +769,7 @@ def search_summaries(self, **query): """ for _, results in self._do_search_by_product(query, return_fields=True): for columns in results: - output = dict(columns) + output = columns._asdict() _LOG.warning("search results: %s (%s)", output["id"], output["product"]) yield output diff --git a/docker/constraints.in b/docker/constraints.in index 4d21517cd..67b98b60e 100644 --- a/docker/constraints.in +++ b/docker/constraints.in @@ -14,7 +14,7 @@ dask>=2021.10.1 distributed>=2021.10.0 fiona geoalchemy2 -jsonschema +jsonschema<4.18 # Was lark-parser>=0.6.7 lark matplotlib diff --git a/docker/constraints.txt b/docker/constraints.txt index f49146132..98de0175e 100644 --- a/docker/constraints.txt +++ b/docker/constraints.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile --strip-extras constraints.in @@ -114,6 +114,10 @@ docutils==0.18.1 # sphinx # sphinx-click # sphinx-rtd-theme +exceptiongroup==1.1.2 + # via + # hypothesis + # pytest fiona==1.9.1 # via -r constraints.in fonttools==4.38.0 @@ -326,6 +330,8 @@ rich==13.3.1 # via twine ruamel-yaml==0.17.21 # via -r constraints.in +ruamel-yaml-clib==0.2.7 + # via ruamel-yaml s3transfer==0.6.0 # via boto3 secretstorage==3.3.3 @@ -386,6 +392,11 @@ toml==0.10.2 # via # -r constraints.in # responses +tomli==2.0.1 + # via + # coverage + # pytest + # setuptools-scm toolz==0.12.0 # via # -r constraints.in diff --git a/docs/about/whats_new.rst b/docs/about/whats_new.rst index e8509c288..dd83d2537 100644 --- a/docs/about/whats_new.rst +++ b/docs/about/whats_new.rst @@ -10,6 +10,7 @@ v1.8.next - Improve error message for mismatch between dataset metadata and product signature (:pull:`1472`) - Mark ``--confirm-ignore-lineage``, ``--auto-add-lineage``, and ``--verify-lineage`` as deprecated or to be deprecated (:pull:`1472`) - Default delta values in ``archive_less_mature`` and ``find_less_mature`` (:pull:`1472`) +- Fix SQLAlchemy calls and pin jsonschema version to suppress deprecation warnings (:pull:`1476`) v1.8.15 (11th July 2023) ======================== diff --git a/setup.py b/setup.py index 87115bad4..ef65bcb94 100755 --- a/setup.py +++ b/setup.py @@ -96,7 +96,7 @@ 'cloudpickle>=0.4', 'dask[array]', 'distributed', - 'jsonschema', + 'jsonschema<4.18', 'netcdf4', 'numpy', 'psycopg2', diff --git a/wordlist.txt b/wordlist.txt index e96a7904d..2f935c5da 100644 --- a/wordlist.txt +++ b/wordlist.txt @@ -224,6 +224,7 @@ jfEZEOkxRXgNsAsHEC jpg JSON jsonify +jsonschema Jupyter jupyter JupyterLab