opendatacube · SpacemanPaul · Jul 24, 2023 · Jul 21, 2023 · Jul 21, 2023 · Jul 24, 2023
diff --git a/conda-environment.yml b/conda-environment.yml
@@ -22,7 +22,7 @@ dependencies:
   - dask
   - pyproj >=2.5
   - shapely >=2.0
-  - jsonschema
+  - jsonschema <4.18
   - lark
   - netcdf4
   - numpy

diff --git a/datacube/drivers/postgis/_api.py b/datacube/drivers/postgis/_api.py
@@ -389,9 +389,9 @@ def spatial_extent(self, ids, crs):
         if SpatialIndex is None:
             return None
         result = self._connection.execute(
-            select([
+            select(
                 func.ST_AsGeoJSON(func.ST_Union(SpatialIndex.extent))
-            ]).select_from(
+            ).select_from(
                 SpatialIndex
             ).where(
                 SpatialIndex.dataset_ref.in_(ids)
@@ -528,12 +528,12 @@ def delete_dataset(self, dataset_id):
 
     def get_dataset(self, dataset_id):
         return self._connection.execute(
-            select(_dataset_select_fields()).where(Dataset.id == dataset_id)
+            select(*_dataset_select_fields()).where(Dataset.id == dataset_id)
         ).first()
 
     def get_datasets(self, dataset_ids):
         return self._connection.execute(
-            select(_dataset_select_fields()).where(Dataset.id.in_(dataset_ids))
+            select(*_dataset_select_fields()).where(Dataset.id.in_(dataset_ids))
         ).fetchall()
 
     def get_derived_datasets(self, dataset_id):
@@ -551,7 +551,7 @@ def search_datasets_by_metadata(self, metadata):
         """
         # Find any storage types whose 'dataset_metadata' document is a subset of the metadata.
         return self._connection.execute(
-            select(_dataset_select_fields()).where(Dataset.metadata_doc.contains(metadata))
+            select(*_dataset_select_fields()).where(Dataset.metadata_doc.contains(metadata))
         ).fetchall()
 
     def search_products_by_metadata(self, metadata):
@@ -622,7 +622,7 @@ def search_datasets_query(self,
         raw_expressions = PostgisDbAPI._alchemify_expressions(expressions)
         join_tables = PostgisDbAPI._join_tables(expressions, select_fields)
         where_expr = and_(Dataset.archived == None, *raw_expressions)
-        query = select(select_columns).select_from(Dataset)
+        query = select(*select_columns).select_from(Dataset)
         for joins in join_tables:
             query = query.join(*joins)
         if spatialquery is not None:
@@ -664,7 +664,7 @@ def bulk_simple_dataset_search(self, products=None, batch_size=0):
         if batch_size > 0 and not self.in_transaction:
             raise ValueError("Postgresql bulk reads must occur within a transaction.")
         query = select(
-            _dataset_bulk_select_fields()
+            *_dataset_bulk_select_fields()
         ).select_from(Dataset).where(
             Dataset.archived == None
         )
@@ -709,8 +709,9 @@ def get_duplicates(self, match_fields: Sequence[PgField], expressions: Sequence[
         group_expressions = tuple(f.alchemy_expression for f in match_fields)
         join_tables = PostgisDbAPI._join_tables(expressions, match_fields)
 
+        cols = (func.array_agg(Dataset.id),) + group_expressions
         query = select(
-            (func.array_agg(Dataset.id),) + group_expressions
+            *cols
         ).select_from(Dataset)
         for joins in join_tables:
             query = query.join(*joins)
@@ -763,24 +764,24 @@ def count_datasets_through_time(self, start, end, period, time_field, expression
     def count_datasets_through_time_query(self, start, end, period, time_field, expressions):
         raw_expressions = self._alchemify_expressions(expressions)
 
-        start_times = select((
+        start_times = select(
             func.generate_series(start, end, cast(period, INTERVAL)).label('start_time'),
-        )).alias('start_times')
+        ).alias('start_times')
 
         time_range_select = (
-            select((
+            select(
                 func.tstzrange(
                     start_times.c.start_time,
                     func.lead(start_times.c.start_time).over()
                 ).label('time_period'),
-            ))
+            )
         ).alias('all_time_ranges')
 
         # Exclude the trailing (end time to infinite) row. Is there a simpler way?
         time_ranges = (
-            select((
+            select(
                 time_range_select,
-            )).where(
+            ).where(
                 ~func.upper_inf(time_range_select.c.time_period)
             )
         ).alias('time_ranges')
@@ -797,7 +798,7 @@ def count_datasets_through_time_query(self, start, end, period, time_field, expr
             )
         )
 
-        return select((time_ranges.c.time_period, count_query.label('dataset_count')))
+        return select(time_ranges.c.time_period, count_query.label('dataset_count'))
 
     def update_search_index(self, product_names: Sequence[str] = [], dsids: Sequence[DSID] = []):
         """

diff --git a/datacube/drivers/postgres/_core.py b/datacube/drivers/postgres/_core.py
@@ -17,7 +17,7 @@
                                            pg_column_exists)
 from sqlalchemy import MetaData, inspect, text
 from sqlalchemy.engine import Engine
-from sqlalchemy.schema import CreateSchema
+from sqlalchemy.schema import CreateSchema, DropSchema
 
 
 USER_ROLES = ('agdc_user', 'agdc_ingest', 'agdc_manage', 'agdc_admin')
@@ -239,7 +239,7 @@ def has_schema(engine):
 
 
 def drop_db(connection):
-    connection.execute(text(f'drop schema if exists {SCHEMA_NAME} cascade;'))
+    connection.execute(DropSchema(SCHEMA_NAME, cascade=True))
 
 
 def to_pg_role(role):

diff --git a/datacube/index/postgis/_datasets.py b/datacube/index/postgis/_datasets.py
@@ -769,7 +769,7 @@ def search_summaries(self, **query):
         """
         for _, results in self._do_search_by_product(query, return_fields=True):
             for columns in results:
-                output = dict(columns)
+                output = columns._asdict()
                 _LOG.warning("search results: %s (%s)", output["id"], output["product"])
                 yield output
 

diff --git a/docker/constraints.in b/docker/constraints.in
@@ -14,7 +14,7 @@ dask>=2021.10.1
 distributed>=2021.10.0
 fiona
 geoalchemy2
-jsonschema
+jsonschema<4.18
 # Was lark-parser>=0.6.7
 lark
 matplotlib

diff --git a/docker/constraints.txt b/docker/constraints.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.11
+# This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
 #    pip-compile --strip-extras constraints.in
@@ -114,6 +114,10 @@ docutils==0.18.1
     #   sphinx
     #   sphinx-click
     #   sphinx-rtd-theme
+exceptiongroup==1.1.2
+    # via
+    #   hypothesis
+    #   pytest
 fiona==1.9.1
     # via -r constraints.in
 fonttools==4.38.0
@@ -326,6 +330,8 @@ rich==13.3.1
     # via twine
 ruamel-yaml==0.17.21
     # via -r constraints.in
+ruamel-yaml-clib==0.2.7
+    # via ruamel-yaml
 s3transfer==0.6.0
     # via boto3
 secretstorage==3.3.3
@@ -386,6 +392,11 @@ toml==0.10.2
     # via
     #   -r constraints.in
     #   responses
+tomli==2.0.1
+    # via
+    #   coverage
+    #   pytest
+    #   setuptools-scm
 toolz==0.12.0
     # via
     #   -r constraints.in

diff --git a/docs/about/whats_new.rst b/docs/about/whats_new.rst
@@ -10,6 +10,7 @@ v1.8.next
 - Improve error message for mismatch between dataset metadata and product signature (:pull:`1472`)
 - Mark ``--confirm-ignore-lineage``, ``--auto-add-lineage``, and ``--verify-lineage`` as deprecated or to be deprecated (:pull:`1472`)
 - Default delta values in ``archive_less_mature`` and ``find_less_mature`` (:pull:`1472`)
+- Fix SQLAlchemy calls and pin jsonschema version to supporess deprecation warnings (:pull:`1476`)
 
 v1.8.15 (11th July 2023)
 ========================

diff --git a/setup.py b/setup.py
@@ -96,7 +96,7 @@
         'cloudpickle>=0.4',
         'dask[array]',
         'distributed',
-        'jsonschema',
+        'jsonschema<4.18',
         'netcdf4',
         'numpy',
         'psycopg2',