# Download Sentinel-2 data
* Iterate over each region in the supplied `regions.geojson` input 
* For each region, query Planetary Computer STAC database and load scenes into a data cube
  * Group scenes by solar date and subset to area provided in `regions.geojson`
* Save each scene by solar date by region

Regions (**bolded** regions selected in this analysis)
* **Gambia-flooding-8-11-2022**
* Hurricane-Fiona-9-19-2022
* Hurricane-Ian-9-26-2022
* **Indonesia-Earthquake22**
* **Kahramanmaras-turkey-earthquake-23**
* New-Zealand-Flooding22
* New-Zealand-Flooding23
* Sudan-flooding-8-22-2022
* **afghanistan-earthquake22**
* **cyclone-emnati22**
* kentucky-flooding-7-29-2022
* pakistan-flooding22
* southafrica-flooding22
* tonga-volcano21
* **volcano-indonesia21**
* yellowstone-flooding22
* **baltimore-nd**

In [5]:
# Standard library imports
import json
import os
from pathlib import Path

# Third-party imports
import boto3
import dask
import dask.distributed
import dask.utils
from datacube.utils.cog import write_cog
from dotenv import load_dotenv
import geopandas as gpd
import numpy as np
from odc.stac import configure_rio, stac_load
import pandas as pd
import planetary_computer as pc
from pystac_client import Client
import rasterio as rio
from rasterio.mask import mask as rio_mask
from rasterio.session import AWSSession
import xarray as xr
from IPython.display import display

# Local imports
from utils import to_float


print("Load environment variables from .env file.")
load_dotenv()
USGS_API_KEY = os.environ["USGS_API_KEY"]
USGS_TOKEN_NAME = os.environ["USGS_TOKEN_NAME"]
USGS_USERNAME = os.environ["USGS_USERNAME"]
USGS_PASSWORD = os.environ["USGS_PASSWORD"]
AWS_ACCESS_KEY = os.environ["AWS_ACCESS_KEY"]
AWS_SECRET_KEY = os.environ["AWS_SECRET_KEY"]
NASA_EARTHDATA_S3_ACCESS_KEY = os.environ["NASA_EARTHDATA_S3_ACCESS_KEY"]
NASA_EARTHDATA_S3_SECRET_KEY = os.environ["NASA_EARTHDATA_S3_SECRET_KEY"]
NASA_EARTHDATA_S3_SESSION = os.environ["NASA_EARTHDATA_S3_SESSION"]
NASA_EARTHDATA_USERNAME = os.environ["NASA_EARTHDATA_USERNAME"]
NASA_EARTHDATA_PASSWORD = os.environ["NASA_EARTHDATA_PASSWORD"]

DATA_DIR = Path(r"C:\Users\Peter\gh\rasmussen-705.603\data\FinalProject")
RES = 10
STAC_ENDPOINT = "https://planetarycomputer.microsoft.com/api/stac/v1"
COLLECTIONS = ["sentinel-2-l2a"]
COLLECTION_BANDS = ["blue", "green", "red", "nir08", "swir16", "swir22", "qa"]
OUTPUT_BANDS = ["blue", "green", "red", "nir08", "swir16", "swir22", "ndvi", "qa"]

os.environ["GDAL_DISABLE_READDIR_ON_OPEN"] = "FALSE"

Load environment variables from .env file.


In [2]:
# Define directory paths
raw_dir = DATA_DIR / "raw"
interim_dir = DATA_DIR / "interim"
processed_dir = DATA_DIR / "processed"
region_dir = raw_dir / "regions"
dst_dir = interim_dir / "cogs"
dst_dir.mkdir(exist_ok=True, parents=True)

In [3]:
print("Load input region geojson and config.")
with open(raw_dir / "cfg.json")  as f:
    cfg = json.load(f)
regions = gpd.read_file(raw_dir / "regions.geojson")
regions["time_range"] = regions["s2_start"] + "/" + regions["s2_end"]

Load input region geojson and config.


In [4]:
print("Instantiate dask client.")
client = dask.distributed.Client()
configure_rio(cloud_defaults=True, client=client)
display(client)

Instantiate dask client.


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 16,Total memory: 15.93 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:52498,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 16
Started: Just now,Total memory: 15.93 GiB

0,1
Comm: tcp://127.0.0.1:52520,Total threads: 4
Dashboard: http://127.0.0.1:52521/status,Memory: 3.98 GiB
Nanny: tcp://127.0.0.1:52501,
Local directory: C:\Users\Peter\AppData\Local\Temp\dask-worker-space\worker-ga1yp29o,Local directory: C:\Users\Peter\AppData\Local\Temp\dask-worker-space\worker-ga1yp29o

0,1
Comm: tcp://127.0.0.1:52524,Total threads: 4
Dashboard: http://127.0.0.1:52525/status,Memory: 3.98 GiB
Nanny: tcp://127.0.0.1:52502,
Local directory: C:\Users\Peter\AppData\Local\Temp\dask-worker-space\worker-bhzds55u,Local directory: C:\Users\Peter\AppData\Local\Temp\dask-worker-space\worker-bhzds55u

0,1
Comm: tcp://127.0.0.1:52517,Total threads: 4
Dashboard: http://127.0.0.1:52518/status,Memory: 3.98 GiB
Nanny: tcp://127.0.0.1:52503,
Local directory: C:\Users\Peter\AppData\Local\Temp\dask-worker-space\worker-nc8ltd69,Local directory: C:\Users\Peter\AppData\Local\Temp\dask-worker-space\worker-nc8ltd69

0,1
Comm: tcp://127.0.0.1:52523,Total threads: 4
Dashboard: http://127.0.0.1:52526/status,Memory: 3.98 GiB
Nanny: tcp://127.0.0.1:52504,
Local directory: C:\Users\Peter\AppData\Local\Temp\dask-worker-space\worker-yv1yzh8l,Local directory: C:\Users\Peter\AppData\Local\Temp\dask-worker-space\worker-yv1yzh8l


In [6]:
for index in regions.index.values:
    
    event_key = regions.loc[index]["event_key"]
    time_range = regions.loc[index]["time_range"]
    geom_4326 = json.loads(regions.to_crs(4326).to_json())["features"][index]["geometry"]
    maxar_event = regions.loc[index]["maxar_event"]
    bbox_4326 = regions.bounds.loc[index].values.tolist()
    print(f"[{event_key}]: {time_range}.")
    
    print(f"[{event_key}]: Search catalog.")
    catalog = Client.open(STAC_ENDPOINT)
    query = catalog.search(
        collections=COLLECTIONS,
        datetime=time_range,
        bbox=bbox_4326,
    )

    items = list(query.get_items())
    print(f"[{event_key}]: Found {len(items):d} items.")
    
    print(f"[{event_key}]: Load items into data cube.")
    xx = stac_load(
        items,
        bands=COLLECTION_BANDS,
        resolution=RES,
        chunks={"x": 1028, "y": 1028},
        stac_cfg=cfg,
        patch_url=pc.sign,
        crs="utm",
        bbox=bbox_4326,
        fail_on_error=False,   
    )
    nir08 = to_float(xx.nir08)
    red = to_float(xx.red)
    ndvi = ((nir08 - red) / (nir08 + red)).fillna(0).round()
    xx["ndvi"] = ndvi
    xx["qa"] = (xx["qa"] > 0).astype("uint8")
    
    print(f"[{event_key}]: Re-order bands.")
    xx = xx[OUTPUT_BANDS].astype(np.int32)
    n_files = len(xx.time.data)
    print(f"[{event_key}]: Write {n_files} TIFs.")

    for i in range(n_files):
        date = xx.isel(time=i).time.dt.strftime("%Y-%m-%d").data
        dst = dst_dir / f"{event_key}_{date}.tif"
        try:
            arr = xx.isel(time=i).to_array()
            write_cog(geo_im=arr, fname=dst, overwrite=False).compute()
            print(f"[{event_key}]: Wrote {dst.name}.")
        except Exception as e:
            print(f"[{event_key}]: Failed to write {dst.name}.")
            print(f"[{event_key}]: {e}")
    


[af-kharkamar-2022]: 2017-01-01/2023-04-29.
[af-kharkamar-2022]: Search catalog.
[af-kharkamar-2022]: Found 407 items.
[af-kharkamar-2022]: Load items into data cube.
[af-kharkamar-2022]: Re-order bands.
[af-kharkamar-2022]: Write 401 TIFs.
[af-kharkamar-2022]: Wrote af-kharkamar-2022_2017-01-29.tif.
[af-kharkamar-2022]: Wrote af-kharkamar-2022_2017-02-08.tif.
[af-kharkamar-2022]: Wrote af-kharkamar-2022_2017-02-28.tif.
[af-kharkamar-2022]: Wrote af-kharkamar-2022_2017-03-20.tif.
[af-kharkamar-2022]: Wrote af-kharkamar-2022_2017-04-09.tif.
[af-kharkamar-2022]: Wrote af-kharkamar-2022_2017-04-29.tif.
[af-kharkamar-2022]: Wrote af-kharkamar-2022_2017-05-19.tif.
[af-kharkamar-2022]: Wrote af-kharkamar-2022_2017-06-08.tif.
[af-kharkamar-2022]: Wrote af-kharkamar-2022_2017-06-28.tif.
[af-kharkamar-2022]: Wrote af-kharkamar-2022_2017-07-13.tif.
[af-kharkamar-2022]: Wrote af-kharkamar-2022_2017-07-18.tif.
[af-kharkamar-2022]: Wrote af-kharkamar-2022_2017-08-02.tif.
[af-kharkamar-2022]: Wrote 