From 2fde68a7f76419c5f2135cc260e22cc1ee9317ac Mon Sep 17 00:00:00 2001 From: Phil Varner Date: Wed, 25 May 2022 16:00:09 -0400 Subject: [PATCH 1/2] update CQL example notebook with latest CQL2 syntax --- docs/tutorials.rst | 2 +- docs/tutorials/cql-filter.ipynb | 212 ----------------- docs/tutorials/cql2-filter.ipynb | 396 +++++++++++++++++++++++++++++++ pystac_client/item_search.py | 3 +- 4 files changed, 399 insertions(+), 214 deletions(-) delete mode 100644 docs/tutorials/cql-filter.ipynb create mode 100644 docs/tutorials/cql2-filter.ipynb diff --git a/docs/tutorials.rst b/docs/tutorials.rst index f84a7e0a..a952c304 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -22,7 +22,7 @@ STAC metadata and Item geometries on a map. CQL Filtering --------------------------- -- :tutorial:`GitHub version ` +- :tutorial:`GitHub version ` This tutorial gives an introduction to using CQL-JSON filtering in searches to search by arbitrary STAC Item properties. \ No newline at end of file diff --git a/docs/tutorials/cql-filter.ipynb b/docs/tutorials/cql-filter.ipynb deleted file mode 100644 index 3088310d..00000000 --- a/docs/tutorials/cql-filter.ipynb +++ /dev/null @@ -1,212 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "e06a27bf", - "metadata": {}, - "source": [ - "# pystac-client CQL Filtering\n", - "\n", - "This notebook demonstrates the use of pystac-client to use [CQL Filtering](https://github.com/radiantearth/stac-api-spec/tree/master/fragments/filter). The server needs to support this and will advertise conformance as the `https://api.stacspec.org/v1.0.0-beta.3/item-search#filter:filter` class in the `conformsTo` attribute of the root API.\n", - "\n", - "**This should be considered an experimental feature. This notebook uses the Microsoft Planetary Computer staging environment as it is currently the only public CQL implementation. The Planetary Computer also does not advertise the correct conformance class, thus the `ignore_conformance` keyword is specified in the `Client.open` function below.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b65de617", - "metadata": {}, - "outputs": [], - "source": [ - "from pystac_client import Client\n", - "\n", - "# set pystac_client logger to DEBUG to see API calls\n", - "import logging\n", - "logging.basicConfig()\n", - "logger = logging.getLogger('pystac_client')\n", - "logger.setLevel(logging.INFO)\n", - "\n", - "# function for creating GeoDataFrame from Items\n", - "from copy import deepcopy\n", - "import geopandas as gpd\n", - "import pandas as pd\n", - "from shapely.geometry import shape\n", - "\n", - "# convert a list of STAC Items into a GeoDataFrame\n", - "def items_to_geodataframe(items):\n", - " _items = []\n", - " for i in items:\n", - " _i = deepcopy(i)\n", - " _i['geometry'] = shape(_i['geometry'])\n", - " _items.append(_i)\n", - " gdf = gpd.GeoDataFrame(pd.json_normalize(_items))\n", - " for field in ['properties.datetime', 'properties.created', 'properties.updated']:\n", - " if field in gdf:\n", - " gdf[field] = pd.to_datetime(gdf[field])\n", - " gdf.set_index('properties.datetime', inplace=True)\n", - " return gdf" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "98942e75", - "metadata": {}, - "outputs": [], - "source": [ - "# STAC API root URL\n", - "URL = 'https://planetarycomputer-staging.microsoft.com/api/stac/v1'\n", - "\n", - "# custom headers\n", - "headers = []\n", - "\n", - "cat = Client.open(URL, headers=headers, ignore_conformance=True)\n", - "cat" - ] - }, - { - "cell_type": "markdown", - "id": "1e16077c", - "metadata": {}, - "source": [ - "## Initial Search Parameters\n", - "\n", - "Here we perform a search with the `Client.search` function, providing a geometry (`intersects`) a datetime range (`datetime`), and filtering by Item properties (`filter`) using CQL-JSON." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d8af6334", - "metadata": {}, - "outputs": [], - "source": [ - "# AOI around Delfzijl, in the north of The Netherlands\n", - "geom = {\n", - " \"type\": \"Polygon\",\n", - " \"coordinates\": [\n", - " [\n", - " [\n", - " 6.42425537109375,\n", - " 53.174765470134616\n", - " ],\n", - " [\n", - " 7.344360351562499,\n", - " 53.174765470134616\n", - " ],\n", - " [\n", - " 7.344360351562499,\n", - " 53.67393435835391\n", - " ],\n", - " [\n", - " 6.42425537109375,\n", - " 53.67393435835391\n", - " ],\n", - " [\n", - " 6.42425537109375,\n", - " 53.174765470134616\n", - " ]\n", - " ]\n", - " ]\n", - "}\n", - "\n", - "params = {\n", - " \"collections\": \"landsat-8-c2-l2\",\n", - " \"intersects\": geom,\n", - " \"datetime\": \"2018-01-01/2020-12-31\",\n", - " \"max_items\": 100,\n", - "}\n", - "\n", - "import hvplot.pandas\n", - "import json\n", - "\n", - "# reusable search function\n", - "def search_fetch_plot(params, filt):\n", - " # limit sets the # of items per page so we can see multiple pages getting fetched\n", - " params['filter'] = filt\n", - " search = cat.search(**params)\n", - " items_json = search.get_all_items_as_dict()\n", - " # DataFrame\n", - " items_df = pd.DataFrame(items_to_geodataframe(items_json['features']))\n", - " print(f\"{len(items_df.index)} items found\")\n", - " field = 'properties.eo:cloud_cover'\n", - " return items_df.hvplot(y=field, label=json.dumps(filt), frame_height=500, frame_width=800) " - ] - }, - { - "cell_type": "markdown", - "id": "44d3bc04", - "metadata": {}, - "source": [ - "## CQL Filters\n", - "\n", - "Below are examples of several different CQL filters on the `eo:cloud_cover` property. Up to 100 Items are fetched and the eo:cloud_cover values plotted.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dfc0e759", - "metadata": {}, - "outputs": [], - "source": [ - "filt = {\n", - " \"lte\": [{\"property\": \"eo:cloud_cover\"}, 10]\n", - "}\n", - "\n", - "search_fetch_plot(params, filt)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9c2f9ca1", - "metadata": {}, - "outputs": [], - "source": [ - "filt = {\n", - " \"gte\": [{\"property\": \"eo:cloud_cover\"}, 80]\n", - "}\n", - "\n", - "search_fetch_plot(params, filt)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "109f673c", - "metadata": {}, - "outputs": [], - "source": [ - "filt = {\n", - " \"lte\": [{\"property\": \"eo:cloud_cover\"}, 60],\n", - " \"gte\": [{\"property\": \"eo:cloud_cover\"}, 40]\n", - "}\n", - "\n", - "search_fetch_plot(params, filt)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/tutorials/cql2-filter.ipynb b/docs/tutorials/cql2-filter.ipynb new file mode 100644 index 00000000..d55f2260 --- /dev/null +++ b/docs/tutorials/cql2-filter.ipynb @@ -0,0 +1,396 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e06a27bf", + "metadata": {}, + "source": [ + "# pystac-client CQL Filtering\n", + "\n", + "This notebook demonstrates the use of pystac-client to use [CQL2 filtering](https://github.com/radiantearth/stac-api-spec/tree/master/fragments/filter). The server needs to support this and advertise conformance as the `https://api.stacspec.org/v1.0.0-rc.1/item-search#filter` class in the `conformsTo` attribute of the root API.\n", + "\n", + "**This should be considered an experimental feature. This notebook uses the Microsoft Planetary Computer API, as it is currently the only public CQL2 implementation. The Planetary Computer API also does not yet advertise the correct conformance class, thus the `ignore_conformance` keyword is specified in the `Client.open` function below.**" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "b65de617", + "metadata": {}, + "outputs": [], + "source": [ + "from pystac_client import Client\n", + "\n", + "# set pystac_client logger to DEBUG to see API calls\n", + "import logging\n", + "logging.basicConfig()\n", + "logger = logging.getLogger('pystac_client')\n", + "logger.setLevel(logging.INFO)\n", + "\n", + "# function for creating GeoDataFrame from Items\n", + "from copy import deepcopy\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "from shapely.geometry import shape\n", + "\n", + "# convert a list of STAC Items into a GeoDataFrame\n", + "def items_to_geodataframe(items):\n", + " _items = []\n", + " for i in items:\n", + " _i = deepcopy(i)\n", + " _i['geometry'] = shape(_i['geometry'])\n", + " _items.append(_i)\n", + " gdf = gpd.GeoDataFrame(pd.json_normalize(_items))\n", + " for field in ['properties.datetime', 'properties.created', 'properties.updated']:\n", + " if field in gdf:\n", + " gdf[field] = pd.to_datetime(gdf[field])\n", + " gdf.set_index('properties.datetime', inplace=True)\n", + " return gdf" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "98942e75", + "metadata": {}, + "outputs": [], + "source": [ + "# STAC API root URL\n", + "URL = 'https://planetarycomputer.microsoft.com/api/stac/v1'\n", + "\n", + "# custom headers\n", + "headers = []\n", + "\n", + "cat = Client.open(URL, headers=headers, ignore_conformance=True)" + ] + }, + { + "cell_type": "markdown", + "id": "1e16077c", + "metadata": {}, + "source": [ + "## Initial Search Parameters\n", + "\n", + "Here we perform a search with the `Client.search` function, providing a geometry (`intersects`) a datetime range (`datetime`), and filtering by Item properties (`filter`) using CQL2-JSON." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "d8af6334", + "metadata": {}, + "outputs": [], + "source": [ + "# AOI around Delfzijl, in the north of The Netherlands\n", + "geom = {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [\n", + " 6.42425537109375,\n", + " 53.174765470134616\n", + " ],\n", + " [\n", + " 7.344360351562499,\n", + " 53.174765470134616\n", + " ],\n", + " [\n", + " 7.344360351562499,\n", + " 53.67393435835391\n", + " ],\n", + " [\n", + " 6.42425537109375,\n", + " 53.67393435835391\n", + " ],\n", + " [\n", + " 6.42425537109375,\n", + " 53.174765470134616\n", + " ]\n", + " ]\n", + " ]\n", + "}\n", + "\n", + "params = {\n", + " \"collections\": \"landsat-8-c2-l2\",\n", + " \"intersects\": geom,\n", + " \"datetime\": \"2018-01-01/2020-12-31\",\n", + " \"max_items\": 100,\n", + "}\n", + "\n", + "import hvplot.pandas\n", + "import json\n", + "\n", + "# reusable search function\n", + "def search_fetch_plot(params, filt):\n", + " # limit sets the # of items per page so we can see multiple pages getting fetched\n", + " params['filter'] = filt\n", + " search = cat.search(**params)\n", + " items_json = search.get_all_items_as_dict()\n", + " # DataFrame\n", + " items_df = pd.DataFrame(items_to_geodataframe(items_json['features']))\n", + " print(f\"{len(items_df.index)} items found\")\n", + " field = 'properties.eo:cloud_cover'\n", + " return items_df.hvplot(y=field, label=json.dumps(filt), frame_height=500, frame_width=800) " + ] + }, + { + "cell_type": "markdown", + "id": "44d3bc04", + "metadata": {}, + "source": [ + "## CQL2 Filters\n", + "\n", + "Below are examples of several different CQL2 filters on the `eo:cloud_cover` property. Up to 100 Items are fetched and the eo:cloud_cover values plotted." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "dfc0e759", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "33 items found\n" + ] + }, + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + ":Curve [properties.datetime] (properties.eo:cloud_cover)" + ] + }, + "execution_count": 18, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "2290" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "filt = {\n", + " \"op\": \"lte\",\n", + " \"args\": [{\"property\": \"eo:cloud_cover\"}, 10]\n", + "}\n", + "\n", + "search_fetch_plot(params, filt)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "9c2f9ca1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "92 items found\n" + ] + }, + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + ":Curve [properties.datetime] (properties.eo:cloud_cover)" + ] + }, + "execution_count": 19, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "2474" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "filt = {\n", + " \"op\": \"gte\",\n", + " \"args\" : [{\"property\": \"eo:cloud_cover\"}, 80]\n", + "}\n", + "\n", + "search_fetch_plot(params, filt)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "109f673c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "41 items found\n" + ] + }, + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + ":Curve [properties.datetime] (properties.eo:cloud_cover)" + ] + }, + "execution_count": 20, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "2658" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "filt = { \n", + " \"op\": \"and\",\n", + " \"args\": [\n", + " { \n", + " \"op\":\"lte\", \n", + " \"args\": [{\"property\": \"eo:cloud_cover\"}, 60]\n", + " },\n", + " { \n", + " \"op\": \"gte\", \n", + " \"args\": [{\"property\": \"eo:cloud_cover\"}, 40]\n", + " }\n", + " ]\n", + "}\n", + "\n", + "search_fetch_plot(params, filt)" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "6b6313dbab648ff537330b996f33bf845c0da10ea77ae70864d6ca8e2699c7ea" + }, + "kernelspec": { + "display_name": "Python 3.9.11 ('.venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pystac_client/item_search.py b/pystac_client/item_search.py index a497a4ea..c2f34f21 100644 --- a/pystac_client/item_search.py +++ b/pystac_client/item_search.py @@ -139,7 +139,8 @@ class ItemSearch: of the provided Collections will be searched query: List or JSON of query parameters as per the STAC API `query` extension filter: JSON of query parameters as per the STAC API `filter` extension - filter_lang: Language variant used in the filter body. If `filter` is a dictionary or not provided, defaults to 'cql2-json'. If `filter` is a string, defaults to `cql2-text`. + filter_lang: Language variant used in the filter body. If `filter` is a dictionary or not provided, defaults + to 'cql2-json'. If `filter` is a string, defaults to `cql2-text`. sortby: A single field or list of fields to sort the response by fields: A list of fields to return in the response. Note this may result in invalid JSON. Use `get_all_items_as_dict` to avoid errors From 74fff7fb7a48ad8fd0be2e15c1bb0024553f7e8c Mon Sep 17 00:00:00 2001 From: Phil Varner Date: Wed, 25 May 2022 16:59:57 -0400 Subject: [PATCH 2/2] update libraries required to run notebooks --- requirements-docs.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements-docs.txt b/requirements-docs.txt index 25cc64c1..e3db0006 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -5,3 +5,5 @@ sphinxcontrib-fulltoc~=1.2 myst-parser~=0.15.2 nbsphinx~=0.8 jinja2<4.0 +geopandas~=0.10.2 +hvplot~=0.8.0 \ No newline at end of file