# How to exploit data on Pangeo

#### Pangeo Workshop - Snow and Cloud Cover
converted from https://github.com/EO-College/cubes-and-clouds/blob/main/lectures/3.1_data_processing/exercises/_alternatives/31_data_processing_stac.ipynb

author: Michele Clous @mclous
conversion by: Pangeo volunteers (Pier Lorenzo Marasco @pl-marasco, Alejandro Coca-Castro @, Justus Magin @ , Tina Odaka @tinaodaka, Anne fouilloux @annefou)

#### Introduction
In this exercise, we will build a complete the same EO workflow as OpenEO using cloud provided data (STAC Catalogue), processing it locally; from data access to obtaining the result.

We are going to follow these steps in our analysis:

- Load satellite collections
- Specify the spatial, temporal extents and the features we are interested in
- Process the satellite data to retrieve snow cover information
- aggregate information in data cubes
- Visualize and analyse the results

###
Important Infos 

More information on Pangeo can be found here: https://pangeo.io/
More information on the STAC specification can be found here: https://stacspec.org/


#### Import libraries

In [None]:
# Data Manipulation and Analysis Libraries
import pandas as pd  
import numpy as np 

# Geospatial Data Handling Libraries
import geopandas as gpd 
from shapely.geometry import mapping  
import pyproj

# Multidimensional and Satellite Data Libraries
import xarray as xr 
import rioxarray as rio
import stackstac

# Data Visualization Libraries
import holoviews as hv
import hvplot.xarray
import hvplot.pandas

# Data parallelization and distributed computing libraries
import dask
from dask.distributed import Client, progress, LocalCluster

# STAC Catalogue Libraries
import pystac_client

In [None]:
cluster = LocalCluster()
client = Client(cluster)

In [None]:
aoi = gpd.read_file('data/catchment_outline.geojson', crs="EPGS:4326")
aoi_geojson = mapping(aoi.iloc[0].geometry)

In [None]:
URL = "https://earth-search.aws.element84.com/v1"
catalog = pystac_client.Client.open(URL)
items = catalog.search(
    intersects=aoi_geojson,
    collections=["sentinel-2-l2a"],
    datetime="2019-02-01/2019-06-10"
).item_collection()
len(items)

In [None]:
# Get bands information
# selected_item = items[1]
# for key, asset in selected_item.assets.items():
#     print(f"{key}: {asset.title}")

In [None]:
ds = stackstac.stack(items)

In [None]:
green = ds.sel(band='green')
swir = ds.sel(band='swir16')
scl = ds.sel(band='scl')

In [None]:
ndsi = (green - swir) / (green + swir)

In [None]:
# ndsi = ndsi.chunk(chunks={'time':1, 'x': 2048, 'y': 2048})

In [None]:
snow = xr.where((ndsi > 0.42) & ~np.isnan(ndsi), 1, ndsi)
snowmap = xr.where((snow <= 0.42) & ~np.isnan(snow), 0, snow)
# mask = (scl != 8) & (scl != 9) & (scl != 3) 
mask = np.logical_not(scl.isin([8, 9, 3]))  # more elegant but not sure about it from a teaching perspective
snow_cloud = xr.where(mask, snowmap, 2)

In [None]:
aoi_utm32 = aoi.to_crs(epsg=32632)
geom_utm32 = aoi_utm32.iloc[0]['geometry']

In [None]:
snow_cloud.rio.write_crs("EPSG:32632", inplace=True)
snow_cloud.rio.set_nodata(np.nan, inplace=True)

In [None]:
snowmap_clipped = snow_cloud.rio.clip([geom_utm32])

In [None]:
# from dask.diagnostics import ProgressBar
# with ProgressBar():
#     clipped_date = snowmap_clipped.compute()

In [None]:
clipped_date = snowmap_clipped.compute()

In [None]:
progress(clipped_date)

In [None]:
clipped_date = snowmap_clipped.groupby(snowmap_clipped.time.dt.floor('D')).max(skipna=True)

In [None]:
clipped_date = clipped_date.rename({'floor': 'date'})

In [None]:
clipped_date

In [None]:
clipped_date.hvplot.image(
    x='x',
    y='y',
    groupby='date',
    crs=pyproj.CRS.from_epsg(32632),
    cmap='Pastel2',
    clim=(-1, 2),
    frame_width=500,
    frame_height=500,
    title='Snowmap',
    geo=True, tiles='OSM')

In [None]:
cloud = xr.where(clipped_date == 2, 1, np.nan).count(dim=['x', 'y'])

In [None]:
aot_total = clipped_date.count(dim=['x', 'y'])

In [None]:
cloud_fraction = cloud / aot_total * 100

In [None]:
cloud_fraction.hvplot.line(title='Cloud cover %', ylabel="&") * hv.HLine(25).opts(
    color='red',
    line_dash='dashed',
    line_width=2.0,
)

In [None]:
snow = xr.where(clipped_date == 1, 1, np.nan).count(dim=['x', 'y'])

In [None]:
snow_fraction = snow / aot_total * 100

In [None]:
snow_fraction.hvplot.line(title='Snow cover area (%)', ylabel="%")

In [None]:
masked_cloud_fraction = cloud_fraction < 30

In [None]:
snow_selected = snow_fraction.sel(date=masked_cloud_fraction)

In [None]:
snow_selected.name = 'SCA'

In [None]:
snow_selected.hvplot.line(title="Snow fraction")

In [None]:
discharge_ds = pd.read_csv('data/ADO_DSC_ITH1_0025.csv', sep=',', index_col='Time', parse_dates=True)

In [None]:
discharge_ds.head()

In [None]:
start_date = pd.to_datetime("2019/02/01")
end_date = pd.to_datetime("2019/06/30")
# filter discharge data to start and end dates
discharge_ds = discharge_ds.loc[start_date:end_date]

discharge_ds.discharge_m3_s.hvplot(title='Discharge volume', ylabel='Discharge (m$^3$/s)') * snow_selected.hvplot(ylabel='Snow cover area (%)')  