In [1]:
import planetary_computer
from pystac_client import Client
import stackstac
import xarray as xr
import rioxarray as rio
import numpy as np
import pandas as pd
import geopandas as gpd
import os

import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import rioxarray as rio
from rasterio.features import rasterize
from rasterio.enums import Resampling

from shapely import Point
import math as math

import pandas as pd
from pathlib import Path
import folium

root = Path.cwd()

In [30]:
b = gpd.read_file(root / 'data' / 'ordway' / 'ordway_boundary.gpkg')

boundary = b.to_crs(epsg=26917)

#b.explore()

In [50]:
boundary.crs.to_epsg()

26917

In [None]:
def map_sentinel_tiles(boundary):
    s2_grid_url = "https://unpkg.com/sentinel-2-grid/data/grid.json"
    grid_gdf = gpd.read_file(s2_grid_url)

    # find tiles that intersect with AOI
    bb = boundary.boundary.to_crs(grid_gdf.crs)
    tiles = grid_gdf[grid_gdf.intersects(bb.geometry.iloc[0])]
    exploded = tiles.explode()[0:] # separate geometry collection into single geometries

    m = folium.Map(location=(42.44,-76.21), zoom_start=5, tiles='OpenStreetMap')

    folium.GeoJson(
        exploded,
        tooltip=folium.GeoJsonTooltip(fields=["id"])
    ).add_to(m)

    folium.GeoJson(bb).add_to(m)
    
    return m, 

m = map_sentinel_tiles(boundary)

m

In [43]:
def get_sentinel_tiles(boundary):
    s2_grid_url = "https://unpkg.com/sentinel-2-grid/data/grid.json"
    grid_gdf = gpd.read_file(s2_grid_url)

    # find tiles that intersect with AOI
    bb = boundary.boundary.to_crs(grid_gdf.crs)
    tiles = grid_gdf[grid_gdf.intersects(bb.geometry.iloc[0])]
    
    return [t for t in tiles['name']]

In [3]:
years = [2022, 2023]
epsg = 26917

bbox_4326 = tuple(boundary.to_crs(4326).total_bounds)
bbox_utm = tuple(boundary.total_bounds)

catalog = Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
)


query = {"eo:cloud_cover":{"lt":20}}
            # "s2:processing_baseline": {"eq": "04.00"}} 

timesteps = []
items_dict = {}

for year in years:
    items = catalog.search(
        bbox=bbox_4326,
        collections=["sentinel-2-l2a"],
        datetime=f"{year}-07-01/{year}-09-01",
        query= query #"s2:mgrs_tile": {"eq": tile_id},
    ).item_collection()
    print(f'{year}: number of sentinel images found: {len(items)}')
    items_dict[year] = items
    for item in items:
        b = item.properties.get('s2:processing_baseline')
        t = item.properties.get('s2:generation_time')
        print(f'{item.properties.get('datetime')}\ngenerated: {t} baseline: {b}')

2022: number of sentinel images found: 6
2022-08-01T15:59:11.024000Z
generated: 2024-07-13T00:33:31.000000Z baseline: 05.10
2022-08-01T15:59:11.024000Z
generated: 2022-08-02T13:41:09.840840Z baseline: 04.00
2022-08-01T15:59:11.024000Z
generated: 2024-07-13T00:33:31.000000Z baseline: 05.10
2022-08-01T15:59:11.024000Z
generated: 2022-08-02T13:31:08.544681Z baseline: 04.00
2022-07-22T15:59:11.024000Z
generated: 2024-07-11T01:26:39.000000Z baseline: 05.10
2022-07-22T15:59:11.024000Z
generated: 2022-07-24T20:57:55.953305Z baseline: 04.00
2023: number of sentinel images found: 10
2023-08-31T15:58:29.024000Z
generated: 2024-10-27T19:03:09.000000Z baseline: 05.10
2023-08-31T15:58:29.024000Z
generated: 2023-08-31T23:14:15.128965Z baseline: 05.09
2023-08-31T15:58:29.024000Z
generated: 2024-10-27T19:03:09.000000Z baseline: 05.10
2023-08-31T15:58:29.024000Z
generated: 2023-08-31T22:16:51.50886Z baseline: 05.09
2023-08-21T15:58:29.024000Z
generated: 2024-10-24T13:17:10.000000Z baseline: 05.10
2023-

In [None]:
items23 = items_dict[2023]


baselines = []
for i in items:
    baselines.append(float(i.properties.get('s2:processing_baseline')))
highest = np.max(baselines)
selected_items = [i for i in items if float(i.properties.get('s2:processing_baseline')) == highest]
if highest < 4.0:
    apply_offset = True
else:
    apply_offset = False




In [38]:
items[0].properties.get('s2:mgrs_tile')

'17RLP'

In [None]:
tiles = get_sentinel_tiles(boundary)

In [None]:
# get least cloudy time step between July - August for each year and assemble into timeseries
years = [2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025]


def build_summer_timeseries(boundary, years):

    epsg = boundary.crs.to_epsg()
    bbox_4326 = tuple(boundary.to_crs(4326).total_bounds)
    bbox_utm = tuple(boundary.total_bounds)

    catalog = Client.open(
        "https://planetarycomputer.microsoft.com/api/stac/v1",
        modifier=planetary_computer.sign_inplace,
    )


    query = {"eo:cloud_cover":{"lt":20},
            "s2:mgrs_tile": {'in':tiles}} 

    timesteps = []

    for year in years:
        items = catalog.search(
            bbox=bbox_4326,
            collections=["sentinel-2-l2a"],
            datetime=f"{year}-06-15/{year}-08-15",
            query= query #"s2:mgrs_tile": {"eq": tile_id},
        ).item_collection()
        print(f'{year}: number of images found: {len(items)}')
        if len(items) == 0:
            print(f'SKIPPING {year}')
            continue
        
        # get images with highest (i.e. most recent) processing baseline.
        # if highest baseline is less than 4.0, set apply offset to true. 
        baselines = []
        for i in items:
            baselines.append(float(i.properties.get('s2:processing_baseline')))
        highest = np.max(baselines)
        selected_items = [i for i in items if float(i.properties.get('s2:processing_baseline')) == highest]
        if highest < 4.0:
            apply_offset = True
            print(f'processing baseline: {highest} - applying offset')
        else:
            apply_offset = False
            print(f'processing baseline: {highest}')
        
        # create xarray
        stack = stackstac.stack(
            selected_items,
            epsg=epsg,
            resolution=10,
            bounds=bbox_utm,
            assets=['B02','B03','B04','B05','B06','B07','B08','B8A','B11','B12','SCL'],
            resampling=Resampling.bilinear)


        #stack = stack.chunk({'time':timechunk,'band':bandchunk,'y':None,'x':None})

        # # # crop to boundary
        stack = stack.rio.clip(geometries=boundary.geometry)

        stack  = stack.assign_coords(time=stack['time'].dt.floor('D'))

        mosaic = stack.groupby('time').max(dim='time',skipna=True) # merge images with same timestamp

        mosaic = mosaic.drop_attrs()
        mosaic = mosaic.reset_coords(drop=True)

        #mosaic = mosaic.chunk({'time':None,'band':None,'y':1024,'x':1024})

        mosaic = mosaic.astype('float32')

        scl = mosaic.sel(band='SCL')
        mask = ~scl.isin([1, 3, 6, 8, 9, 10, 11])
        #mask = ~scl.isin([1, 3, 6, 8, 9, 10, 11]).persist()

        masked = mosaic.where(mask).drop_sel(band='SCL')

        #masked = masked.where(masked > 0, other=np.nan)

        sample_band = masked.isel(band=0)  
        #valid_pixel_count = da.sum(da.isfinite(sample_band.data), axis=(1, 2))
        valid_pixel_count = sample_band.count(dim=['y','x']).values

        v = valid_pixel_count.argmax() # index of timestep with most valid pixels

        masked_lc = masked.isel(time=v)
        
        if apply_offset:
            scaled = (masked_lc + 1000)/ 10000
        else:
            scaled = masked_lc / 10000

        scaled = scaled.clip(min=0)

        timesteps.append(scaled)

    ts = xr.concat(timesteps,dim='time')

    return ts


2016: number of sentinel images found: 3
processing baseline: 2.12 - applying offset
2017: number of sentinel images found: 1
processing baseline: 2.12 - applying offset
2018: number of sentinel images found: 4
processing baseline: 2.12 - applying offset
2019: number of sentinel images found: 4
processing baseline: 2.12 - applying offset
2020: number of sentinel images found: 6
processing baseline: 2.12 - applying offset
2021: number of sentinel images found: 3
processing baseline: 3.0 - applying offset
2022: number of sentinel images found: 6
processing baseline: 5.1
2023: number of sentinel images found: 4
processing baseline: 5.1
2024: number of sentinel images found: 0
SKIPPING 2024
2025: number of sentinel images found: 6
processing baseline: 5.11


In [49]:
ts

Unnamed: 0,Array,Chunk
Bytes,249.68 MiB,2.77 MiB
Shape,"(9, 10, 782, 930)","(1, 1, 782, 930)"
Dask graph,90 chunks in 245 graph layers,90 chunks in 245 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 249.68 MiB 2.77 MiB Shape (9, 10, 782, 930) (1, 1, 782, 930) Dask graph 90 chunks in 245 graph layers Data type float32 numpy.ndarray",9  1  930  782  10,

Unnamed: 0,Array,Chunk
Bytes,249.68 MiB,2.77 MiB
Shape,"(9, 10, 782, 930)","(1, 1, 782, 930)"
Dask graph,90 chunks in 245 graph layers,90 chunks in 245 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [None]:
class SentinelDownloader:
    def __init__(self,root,year,site_name,epsg):
        self.root = root
        self.site_name = site_name  
        os.makedirs(self.root / 'data' / self.site_name,exist_ok=True)
        self.epsg = epsg
        self.year = year
        boundary = gpd.read_file(self.root / 'data' / self.site_name / f'{self.site_name}_boundary.gpkg')
        self.boundary = boundary.to_crs(epsg)
      
        self.pixel_threshold = int((self.boundary.geometry.area.sum()) / 100)
        
        
        
    def map_sentinel_tiles(self):
        s2_grid_url = "https://unpkg.com/sentinel-2-grid/data/grid.json"
        grid_gdf = gpd.read_file(s2_grid_url)

        # find tiles that intersect with AOI
        bb = self.boundary.to_crs(grid_gdf.crs)
        intersecting_tiles = grid_gdf[grid_gdf.intersects(bb.geometry.iloc[0])]
        exploded = intersecting_tiles.explode()[0:] # separate geometry collection into single geometries

        m = folium.Map(location=(42.44,-76.21), zoom_start=5)

        folium.GeoJson(
            exploded,
            tooltip=folium.GeoJsonTooltip(fields=["id"])
        ).add_to(m)

        folium.GeoJson(bb).add_to(m)
        
        return m
    
    def write_raw_data_to_disk(self,month_name,month1,month2,timechunk=4,bandchunk=4):
    
   
        bbox_4326 = tuple(self.boundary.to_crs(4326).total_bounds)
        bbox_utm = tuple(self.boundary.total_bounds)

        catalog = Client.open(
            "https://planetarycomputer.microsoft.com/api/stac/v1",
            modifier=planetary_computer.sign_inplace,
        )

       
        query = {"eo:cloud_cover":{"lt":20},
                    "s2:processing_baseline": {"eq": "04.00"}} 
       

        items = catalog.search(
            bbox=bbox_4326,
            collections=["sentinel-2-l2a"],
            datetime=f"{self.year}-{month1}-01/{self.year}-{month2}-01",
            query= query #"s2:mgrs_tile": {"eq": tile_id},
        ).item_collection()
        print(f'number of sentinel images found: {len(items)}')


        # create xarray
        stack = stackstac.stack(
            items,
            epsg=self.epsg,
            resolution=10,
            bounds=bbox_utm,
            assets=['B02','B03','B04','B05','B06','B07','B08','B8A','B11','B12','SCL'],
            resampling=Resampling.bilinear)


        stack = stack.chunk({'time':timechunk,'band':bandchunk,'y':None,'x':None})

        # # # crop to boundary
        stack = stack.rio.clip(geometries=self.boundary.geometry)

        stack  = stack.assign_coords(time=stack['time'].dt.floor('D'))

        mosaic = stack.groupby('time').max(dim='time',skipna=True)

        mosaic = mosaic.drop_attrs()
        mosaic = mosaic.reset_coords(drop=True)

        mosaic = mosaic.chunk({'time':None,'band':None,'y':1024,'x':1024})

        mosaic = mosaic.astype('float32')

        mosaic.to_zarr(self.root / 'sentinel_data' / self.site_name / f'{self.year}_raw_data_{month_name}.zarr')

    def clean_and_scale_data(self,month_name):
        a = xr.open_zarr(self.root / 'sentinel_data' / self.site_name / f'{self.year}_raw_data_{month_name}.zarr')
        var_name = list(a.data_vars.keys())[0]
        month = a[var_name]

        scl = month.sel(band='SCL')
        mask = ~scl.isin([1, 3, 6, 8, 9, 10, 11]).persist()

        masked = month.where(mask).drop_sel(band='SCL')

        sample_band = masked.isel(band=0)  
        valid_pixel_count = da.sum(da.isfinite(sample_band.data), axis=(1, 2))

        v = valid_pixel_count.argmax().compute() # index of timestep with most valid pixels

        masked_lc = masked.isel(time=v)

        month = month.where(month > 0, other=np.nan)

        scaled = masked_lc / 10000
        scaled = scaled.clip(min=0)

        scaled = scaled.chunk({'band':-1,'y':None,'x':None})

        scaled.to_zarr(self.root / 'sentinel_data' / self.site_name / f'{self.year}_clean_data_{month_name}.zarr')