# Examples how to work with Global Coastal Transect System 

Run the first few cells to load required functions and jump to the section you're interested in afterwards. 

In [None]:
import logging
import os
import pathlib
import sys

import dask

dask.config.set({"dataframe.query-planning": False})
import dask_geopandas
import duckdb
import geopandas as gpd
import hvplot.pandas
import pandas as pd
import pystac
import shapely
from coastmonitor.geo.geometries import geo_bbox
from dotenv import load_dotenv
from ipyleaflet import Map, basemaps

load_dotenv(override=True)

sas_token = os.getenv("AZURE_STORAGE_SAS_TOKEN")
account_name = "coclico"
storage_options = {"account_name": account_name, "credential": sas_token}

## Load from STAC catalog

Load the transects from our CoCliCo STAC catalog. 

In [None]:
coclico_catalog = pystac.Catalog.from_file(
    "https://coclico.blob.core.windows.net/stac/v1/catalog.json"
)

In [None]:
coclico_catalog

In [None]:
list(coclico_catalog.get_all_collections())

In [None]:
gcts = coclico_catalog.get_child("gcts")
gcts

### Use a dynamic map to extract data by region of interest

The IPyleaflet map below can be used to find the bbox coordinates of a certain region.
Zoom to the area where you want to extract data and run the next cell. Please keep in
mind to wait 1 second because the map has to be rendered before the coordinates can be
extracted. 

In [None]:
m = Map(basemap=basemaps.Esri.WorldImagery, scroll_wheel_zoom=True)
m.center = 41.735966575868716, -70.10032653808595
m.zoom = 9
m.layout.height = "800px"
m

In [None]:
# this makes a GeoPandas dataframe from the DynamicMap that is rendered abo
west, south, east, north = m.west, m.south, m.east, m.north

In [None]:
# makes a list of all items (data partitions) in the GCTS STAC catalog
items = list(gcts.get_all_items())

## The dataset is partitioned into geospatial chunks

The dataset is divided into different chunks, that each span a different region of the world. In the next cell
we read the spatial extends of each chunk and compose that into a GeoDataFrame

In [None]:
bboxes = pd.concat([geo_bbox(*i.to_dict()["bbox"]) for i in items])
bboxes = bboxes.reset_index(drop=True)
bboxes.explore()

## Now we can find the bboxes that cover our region of interest

In [None]:
bboxes_roi = gpd.sjoin(bboxes, roi)[bboxes.columns]
items_roi = [items[i] for i in bboxes_roi.index]

In [None]:
items_roi

In [None]:
items_roi[0]

## The STAC items contain references to where the data is stored

In [None]:
hrefs = [i.assets["data"].href for i in items_roi]

## Cloud based data

The href that you see below is a url to a cloud bucket with the transects for the area of interest. The prefix "az://" is the protocol for Azure cloud storage.

In [None]:
hrefs

## Reading the transect partitions that span our region of interest 

We will read the data from cloud storage - but only the data that spans our region of interest (the DynamicMap above). 

## Dask dataframes are lazy

These dataframes are not in memory yet. We still have to trigger the compute (see cell below)

In [None]:
dask_geopandas.read_parquet(hrefs, storage_options=storage_options)

## Compute the transects that span our region of interest

The transects are not in memory yet. In the next cell we will trigger the retrieval from cloud storage to local client by doing a `ddf.compute()` call. 

In [None]:
%%time
from dask.dataframe.utils import make_meta

transects = dask_geopandas.read_parquet(hrefs, storage_options=storage_options)
transects_roi = (
    transects.sjoin(roi.to_crs(transects.crs)).drop(columns=["index_right"]).compute()
)

transects_roi.head()