From 41e227ff95a7cba1f5e87934321f14b826d93949 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 25 Jan 2023 20:04:58 -0600 Subject: [PATCH] Added s2 baseline change notebook --- datasets/sentinel-2-l2a/baseline-change.ipynb | 900 ++++++++++++++++++ 1 file changed, 900 insertions(+) create mode 100755 datasets/sentinel-2-l2a/baseline-change.ipynb diff --git a/datasets/sentinel-2-l2a/baseline-change.ipynb b/datasets/sentinel-2-l2a/baseline-change.ipynb new file mode 100755 index 00000000..f69d7f98 --- /dev/null +++ b/datasets/sentinel-2-l2a/baseline-change.ipynb @@ -0,0 +1,900 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b7d2e416-6fae-4e13-9de3-149f333c0048", + "metadata": {}, + "source": [ + "## Adjusting for the Sentinel-2 Baseline Change\n", + "\n", + "On January 25th, 2022 the European Space Agency implemented a [change to the processing baseline](https://sentinels.copernicus.eu/web/sentinel/-/copernicus-sentinel-2-major-products-upgrade-upcoming) for Sentinel-2 data that affects both L1C and the L2A data hosted by the Planetary Computer.\n", + "\n", + "The crux of the change is an offset to the L1C image reflectance data, which propagates to the L2A data hosted by the Planetary Computer.\n", + "\n", + "As explained by ESA:\n", + "\n", + "> This evolution allows avoiding the loss of information due to clamping of negative values in the predefined range `[1-32767]` occurring over dark surfaces.\n", + "\n", + "The presence of this offset in newer data makes it inappropraite to (naively) compare data from before and after the baseline change. Trying to apply a machine learning model trained on the old baseline to (unadjusted) imagery from the new baseline will yield incorrect results. And trying to train a machine learning model on old and (unadjusted) new data will likely result in much worse performance.\n", + "\n", + "This notebook demonstrates how the change affects the Sentinel-2 L2A data hosted on the Planetary Computer and how to harmonize the new data to be comparable to the old data." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b30543ce-24e0-4f84-a046-d137c06ba896", + "metadata": {}, + "outputs": [], + "source": [ + "import pystac_client\n", + "import planetary_computer\n", + "import xarray as xr\n", + "import matplotlib.pyplot as plt\n", + "import skimage.exposure\n", + "import seaborn as sns\n", + "import stackstac\n", + "import datetime" + ] + }, + { + "cell_type": "markdown", + "id": "e4b0be43-3b54-4188-b2e6-6097838eade4", + "metadata": {}, + "source": [ + "The `harmonize_to_old` function harmonizes new (post January 25th, 2022) Sentinel 2 data to be compoarable to older data. It expects an xarray DataArray similar to those returned by `stackstac`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "89595090-7ead-4f8f-976a-602a226141fe", + "metadata": {}, + "outputs": [], + "source": [ + "def harmonize_to_old(data):\n", + " \"\"\"\n", + " Harmonize new Sentinel-2 data to the old baseline.\n", + "\n", + " Parameters\n", + " ----------\n", + " data: xarray.DataArray\n", + " A DataArray with four dimensions: time, band, y, x\n", + "\n", + " Returns\n", + " -------\n", + " harmonized: xarray.DataArray\n", + " A DataArray with all values harmonized to the old\n", + " processing baseline.\n", + " \"\"\"\n", + " cutoff = datetime.datetime(2022, 1, 25)\n", + " offset = 1000\n", + " bands = [\n", + " \"B01\",\n", + " \"B02\",\n", + " \"B03\",\n", + " \"B04\",\n", + " \"B05\",\n", + " \"B06\",\n", + " \"B07\",\n", + " \"B08\",\n", + " \"B8A\",\n", + " \"B09\",\n", + " \"B10\",\n", + " \"B11\",\n", + " \"B12\",\n", + " ]\n", + "\n", + " old = data.sel(time=slice(cutoff))\n", + "\n", + " to_process = list(set(bands) & set(data.band.data.tolist()))\n", + " new = data.sel(time=slice(cutoff, None)).drop_sel(band=to_process)\n", + "\n", + " new_harmonized = data.sel(time=slice(cutoff, None), band=to_process).copy()\n", + " new_harmonized.clip(offset)\n", + " new_harmonized -= offset\n", + "\n", + " new = xr.concat([new, new_harmonized], \"band\").sel(band=data.band.data.tolist())\n", + " return xr.concat([old, new], dim=\"time\")" + ] + }, + { + "cell_type": "markdown", + "id": "ed2742fb-08bb-45c0-92d1-34e329eedae2", + "metadata": {}, + "source": [ + "To demonstrate the problem, we'll load an \"old\" scene with the old processing baseline of `03.00`, and a \"new\" scene with a new processing baseline of `04.00`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bd7b7501-c1d9-43fb-ab44-b6c4b6628bbc", + "metadata": {}, + "outputs": [], + "source": [ + "s2 = pystac_client.Client.open(\n", + " \"https://planetarycomputer.microsoft.com/api/stac/v1/\",\n", + " modifier=planetary_computer.sign_inplace,\n", + ").get_collection(\"sentinel-2-l2a\")\n", + "\n", + "old = s2.get_item(\"S2B_MSIL2A_20220106T021339_R060_T50KPC_20220107T033435\")\n", + "new = s2.get_item(\"S2A_MSIL2A_20230116T021341_R060_T50KPC_20230116T112621\")" + ] + }, + { + "cell_type": "markdown", + "id": "e66dd073-dc01-401b-9f28-ad042b5c3d0b", + "metadata": {}, + "source": [ + "We'll stack these together, cropping out a small section of the image to speed up loading." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9eeff95b-c7cc-457d-bc30-1fd6d55d8830", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'stackstac-48d41966479ec31b57df72e75da3841b' (time: 2,\n",
+       "                                                                band: 6,\n",
+       "                                                                y: 1000, x: 1000)>\n",
+       "dask.array<fetch_raster_window, shape=(2, 6, 1000, 1000), dtype=float64, chunksize=(1, 1, 1000, 1000), chunktype=numpy.ndarray>\n",
+       "Coordinates: (12/44)\n",
+       "  * time                                     (time) datetime64[ns] 2022-01-06...\n",
+       "    id                                       (time) <U54 'S2B_MSIL2A_20220106...\n",
+       "  * band                                     (band) <U3 'B02' 'B03' ... 'AOT'\n",
+       "  * x                                        (x) float64 6e+05 6e+05 ... 6.1e+05\n",
+       "  * y                                        (y) float64 7.7e+06 ... 7.69e+06\n",
+       "    sat:orbit_state                          <U10 'descending'\n",
+       "    ...                                       ...\n",
+       "    proj:bbox                                object {600000.0, 7690240.0, 780...\n",
+       "    gsd                                      (band) float64 10.0 10.0 ... 10.0\n",
+       "    common_name                              (band) object 'blue' ... None\n",
+       "    center_wavelength                        (band) object 0.49 0.56 ... None\n",
+       "    full_width_half_max                      (band) object 0.098 0.045 ... None\n",
+       "    epsg                                     int64 32750\n",
+       "Attributes:\n",
+       "    spec:        RasterSpec(epsg=32750, bounds=(600000.0, 7690240.0, 610000.0...\n",
+       "    crs:         epsg:32750\n",
+       "    transform:   | 10.00, 0.00, 600000.00|\\n| 0.00,-10.00, 7700240.00|\\n| 0.0...\n",
+       "    resolution:  10.0
" + ], + "text/plain": [ + "\n", + "dask.array\n", + "Coordinates: (12/44)\n", + " * time (time) datetime64[ns] 2022-01-06...\n", + " id (time) " + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axes = plt.subplots(ncols=2, figsize=(14, 6))\n", + "\n", + "img = ds.sel(band=[\"B04\", \"B03\", \"B02\"]).compute()\n", + "img = xr.apply_ufunc(skimage.exposure.rescale_intensity, img)\n", + "\n", + "img.isel(time=0).plot.imshow(rgb=\"band\", ax=axes[0])\n", + "axes[0].set(title=\"Old\")\n", + "\n", + "img.isel(time=1).plot.imshow(rgb=\"band\", ax=axes[1])\n", + "axes[1].set(title=\"New\")\n", + "for ax in axes:\n", + " ax.set_axis_off()" + ] + }, + { + "cell_type": "markdown", + "id": "db22b0b0-b421-46c8-87ba-318bc084bfc7", + "metadata": {}, + "source": [ + "By taking the histogram of, say, the green band, we'll see that the new data are shifted to the right." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5e8b8eac-54bb-45a8-8677-21184958e5f0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(figsize=(12, 8))\n", + "\n", + "hist, bins = skimage.exposure.histogram(ds.isel(time=0, band=1).data.compute())\n", + "ax.plot(bins, hist, label=\"Old\")\n", + "\n", + "hist, bins = skimage.exposure.histogram(ds.isel(time=1, band=1).data.compute())\n", + "ax.plot(bins, hist, label=\"New\")\n", + "plt.legend()\n", + "sns.despine()" + ] + }, + { + "cell_type": "markdown", + "id": "3d0058aa-9459-48c2-a9bb-ab817a5947fc", + "metadata": {}, + "source": [ + "Now, let's harmonize data, shifting the new data to make it comparable to the old." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d6d71030-e747-4c08-83f2-2809b0388031", + "metadata": {}, + "outputs": [], + "source": [ + "harmonized = harmonize_to_old(ds)" + ] + }, + { + "cell_type": "markdown", + "id": "d966f3e0-d86a-4f1f-ab06-2518b8213e60", + "metadata": {}, + "source": [ + "Now the RGB images look much more similar:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f374c97b-9ca7-4437-bfef-1c0a03428a2c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axes = plt.subplots(ncols=2, figsize=(14, 6))\n", + "\n", + "img = harmonized.sel(band=[\"B04\", \"B03\", \"B02\"]).compute()\n", + "img = xr.apply_ufunc(skimage.exposure.rescale_intensity, img)\n", + "\n", + "img.isel(time=0).plot.imshow(rgb=\"band\", ax=axes[0])\n", + "axes[0].set(title=\"Old\")\n", + "\n", + "img.isel(time=1).plot.imshow(rgb=\"band\", ax=axes[1])\n", + "axes[1].set(title=\"New\")\n", + "for ax in axes:\n", + " ax.set_axis_off()" + ] + }, + { + "cell_type": "markdown", + "id": "0cae2347-76c9-4822-a481-c5ba5bb108ce", + "metadata": {}, + "source": [ + "And the histograms now mostly overlap. Any remaining differences are a result of the two scenes being captured a year apart." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f306d91f-626f-4814-b6a5-25b379aafeaf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(figsize=(12, 8))\n", + "\n", + "hist, bins = skimage.exposure.histogram(harmonized.isel(time=0, band=1).data.compute())\n", + "ax.plot(bins, hist, label=\"Old\")\n", + "\n", + "hist, bins = skimage.exposure.histogram(harmonized.isel(time=1, band=1).data.compute())\n", + "ax.plot(bins, hist, label=\"New\")\n", + "plt.legend()\n", + "sns.despine()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}