### Creating a test One Concern STAC catalog 

In [None]:
from pathlib import Path
import pystac
from pystac.extensions.projection import AssetProjectionExtension
import fsspec
import xarray as xr
import rasterio
import pandas as pd
import json

In [None]:
def get_bbox(ds):
    return ds.longitude.values.min(), ds.latitude.values.min(), ds.longitude.values.max(), ds.latitude.values.max()

In [None]:
remote_path = "gs://sam-temp-dev/kerchunk/jma_hist_rainfall_interpolated_nc_v2_consolidated_20060101_20190930.json"  # JMA
# remote_path = "gs://sam-temp-dev/kerchunk/jwa_historical_reanalysis_rainfall_netcdf_20190901_20201231.json"  # JWA

ds_all = xr.open_dataset(
    "reference://", engine="zarr",
    backend_kwargs={
        "storage_options": {
            "fo": remote_path,
            "remote_protocol": "gcs",
            "remote_options": {"anon": True}
        },
        "consolidated": False
    }
)
ds_all

In [None]:
# # Read in the already created catalog
# catalog = pystac.Catalog.from_file("/Users/slamont/stac/one-concern-stac-catalog/catalog.json")
# catalog.describe()

In [None]:
%%time

# remote_path = "https://storage.cloud.google.com/sam-temp-dev/kerchunk/jma_hist_rainfall_interpolated_nc_v2_consolidated_20060101_20190930.json"  # JMA

# Create the initial
catalog = pystac.Catalog(
    id='One Concern STAC catalog',
    description='Various One Concern datasets supporting dev and prod operations',
    stac_extensions=["https://stac-extensions.github.io/projection/v1.0.0/schema.json"],
    href="gs://sam-temp-dev/stac",
    catalog_type=pystac.CatalogType.SELF_CONTAINED,
)

bbox = get_bbox(ds_all)
start_time = pd.to_datetime(ds_all.isel(time=0).time.values)
end_time = pd.to_datetime(ds_all.isel(time=-1).time.values)
href = remote_path

item = pystac.Item(
    id="jwa-historical-rainfall",
    geometry=[],
    datetime=None,
    bbox=bbox,
    properties={"institution": "One Concern",
                "title": "JMA Interpolated Historical Rainfall (mm/hr)",
                "start_datetime": start_time.strftime('%Y-%m-%d %H:00'),
                "end_datetime": end_time.strftime('%Y-%m-%d %H:00')},
    stac_extensions=['https://stac-extensions.github.io/projection/v1.0.0/schema.json'],
    href=remote_path)

# item.set_self_href(out_stac)

# Add asset
item.add_asset(key="jma", asset=pystac.Asset(href=remote_path,
                                             title="rainrate",
                                             media_type=pystac.MediaType.HDF5))

# extend the asset with projection extension
asset_ext = AssetProjectionExtension.ext(item.assets["jma"])
asset_ext.epsg = 4326
asset_ext.shape = ds_all.rainrate.shape
asset_ext.bbox = bbox
asset_ext.transform = rasterio.transform.from_bounds(bbox[0], bbox[1], bbox[2], bbox[3], ds_all.rainrate.shape[1], ds_all.rainrate.shape[0])

# Get the current (and only) collection and add the new item
# collection = list(catalog.get_collections())[0]

# Create the collection, add the item, then add to catalog
spatial_extent = pystac.SpatialExtent(bboxes=[bbox])
temporal_extent = pystac.TemporalExtent([[start_time, end_time]])
collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)
collection = pystac.Collection(id="weather-data",
                               description="Historic rainfall and meteorological data from various sources (JMA, JWA, etc)",
                               extent=collection_extent,
                               license='test')

# if we add a collection we MUST add a link
item.add_link(
    pystac.Link(
        pystac.RelType.COLLECTION,
        "weather-data",
        media_type=pystac.MediaType.HDF5,
    )
)

collection.add_item(item)
catalog.add_child(collection)

In [None]:
# catalog.make_all_asset_hrefs_relative()
catalog.normalize_hrefs("gs://sam-temp-dev/stac/one-concern-stac-catalog")
catalog.get_self_href()

In [None]:
catalog

### Write the catalog directly to GCS

In [None]:
%%time
import gcsfs
fs = gcsfs.GCSFileSystem()  # project='onec-dev'
indent=2

# Write catalog
with fs.open("gs://sam-temp-dev/stac/one-concern-stac-catalog/catalog.json", "w", content_type="application/json") as f:
    f.write(json.dumps(catalog.to_dict(), indent=indent))

# Write collection
with fs.open("gs://sam-temp-dev/stac/one-concern-stac-catalog/weather-data/collection.json", "w", content_type="application/json") as f:
    f.write(json.dumps(collection.to_dict(), indent=indent))

# Write item(s)
with fs.open("gs://sam-temp-dev/stac/one-concern-stac-catalog/weather-data/jwa-historical-rainfall/jwa-historical-rainfall.json", "w", content_type="application/json") as f:
    f.write(json.dumps(item.to_dict(), indent=indent))

### To read the catalog from GCS, we need to re-define the StacIO class to use fsspec
(taken from stactools: https://stactools.readthedocs.io/en/latest/_modules/stactools/core/io.html#FsspecStacIO)

In [None]:
from pystac.stac_io import DefaultStacIO, StacIO

class FsspecStacIO(DefaultStacIO):
    """A subclass of :py:class:`pystac.DefaultStacIO` that uses `fsspec
    <https://filesystem-spec.readthedocs.io/en/latest/>`_ for reads and writes.
    """

    def read_text_from_href(self, href: str) -> str:
        """Reads a file as a utf-8 string using `fsspec
        <https://filesystem-spec.readthedocs.io/en/latest/>`_

        Args:
            href (str): The href to read.

        Returns:
            str: The read text, decoded as utf-8 if necessary.
        """ 
        with fsspec.open(href, "r") as f:
            s = f.read()
            if isinstance(s, str):
                return s
            elif isinstance(s, bytes):
                return str(s, encoding="utf-8")
            else:
                raise ValueError(f"Unable to decode data loaded from HREF: {href}")
                
StacIO.set_default(FsspecStacIO)

In [None]:
pystac.Catalog.from_file("gs://sam-temp-dev/stac/one-concern-stac-catalog/catalog.json")  #, stac_io=FsspecStacIO)

### Other testing is below here

In [None]:
with fsspec.open("gs://sam-temp-dev/stac/one-concern-stac-catalog/catalog.json") as file: 
    data = json.load(file)
    
cat_gcs = pystac.Catalog.from_dict(data) # , root="https://storage.cloud.google.com/sam-temp-dev/stac/one-concern-stac-catalog/catalog.json")
cat_gcs

In [None]:
collections = list(catalog.get_collections())

In [None]:
for collection in collections:
    print(collection)

In [None]:
collection = catalog.get_child("weather-data")
collection

In [None]:
for root, subcategories, items in catalog.walk():

    print('{} is the current root in the STAC'.format(root.id))
    print('Are there any any STAC validation errors ?: {}'.format(root.validate()))
    
    for subcat in subcategories:
        print('{} is a subcatalog or collection for the current root in the STAC'.format(subcat.id))
        print('Are there any STAC validation errors ?: {}'.format(subcat.validate()))
        
    for item in items:
        print('{} is a STAC Item within the current root'.format(item.id))
        print('Are there any STAC validation errors ?: {}'.format(item.validate()))

### GEE STAC Catalog

In [None]:
gee_cat_path = "https://earthengine-stac.storage.googleapis.com/catalog/catalog.json"

In [None]:
sen2_level2a = "https://storage.googleapis.com/earthengine-stac/catalog/COPERNICUS/COPERNICUS_S2_CLOUD_PROBABILITY.json"

In [None]:
stac_url = (
    "https://earthengine-stac.storage.googleapis.com/catalog/catalog.json"  # https://earthengine-stac.storage.googleapis.com/catalog/catalog.json
)

stac_url = "https://storage.googleapis.com/earthengine-stac/catalog/COPERNICUS/catalog.json"

stac_url = 'https://storage.googleapis.com/earthengine-stac/catalog/COPERNICUS/COPERNICUS_S1_GRD.json'

with urllib.request.urlopen(stac_url) as url:
    data = json.loads(url.read().decode())


In [None]:
data

In [None]:
for link in data["links"]:
    print(link)
    try:
        if link["title"] == "COPERNICUS_S1_GRD":
            href = link["href"]
            break
    except:
        pass
    # break

In [None]:
href

In [None]:
stac_url = "https://earth-search.aws.element84.com/v0"
catalog = pystac_client.Client.open(stac_url)
catalog

In [None]:
collections = list(catalog.get_collections())
for collection in collections:
    print(collection)

In [None]:
collection = catalog.get_child("sentinel-s2-l1c")
collection

In [None]:
bbox = [139.4038110856679964, 38.1097418227999967, 139.4887555307919911, 38.1482418231080018]
datetime = "2021-08-01/2022-09-10"

# bbox = [xmin, ymin, xmax, ymax]
# catalog = pystac_client.Client.open(
#     "https://planetarycomputer.microsoft.com/api/stac/v1"
# )
search = catalog.search(
    collections=["sentinel-s2-l1c"], bbox=bbox, datetime=datetime)

In [None]:
items = search.item_collection()
print(f"Found {len(items)} items")

## PlanetaryComputer Interactions

In [None]:
import os
import urllib

import requests
import pystac
import pystac_client
from pystac.extensions.eo import EOExtension as eo
import planetary_computer
import rasterio
import stackstac
import hvplot.xarray

# import dask.diagnostics
# with dask.diagnostics.ProgressBar():
#     data = aoi.compute()

In [None]:
# IF NEEDED, generate a token

# BASE_TOKEN_URL = "https://planetarycomputer.microsoft.com/api/sas/v1/token"

# collection = "sentinel-2-l2a"

# url = os.path.join(BASE_TOKEN_URL, collection)

# headers = {'accept': 'application/json'}
# response = requests.get(url, headers=headers)
# response.json()

In [None]:
%%time
catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
)
collections = list(catalog.get_collections())
collections

In [None]:
# # Murakami
# bbox = [139.4038110856679964, 38.1097418227999967, 139.4887555307919911, 38.1482418231080018]
# datetime = "2022-08-01/2022-08-04"

bbox = [-82.58766624, 25.56165135, -80.69396004, 27.16010966]  # SW Florida

# bbox = [xmin, ymin, xmax, ymax]
catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1"
)
search = catalog.search(
    collections=["3dep-seamless"], bbox=bbox   #  datetime=datetime
)
items = search.item_collection()
print(f"Found {len(items)} items")

In [None]:
for item in items:
    print(item)

In [None]:
%env PC_SDK_SUBSCRIPTION_KEY="3f5d9411e2d8491387615b1f4c95e018"  # did this work or was it the command line approach??

In [None]:
item_signed = planetary_computer.sign(item)

In [None]:
ds = stackstac.stack(item_signed)

In [None]:
vv = ds.sel(band="vh").compute()

In [None]:
vv

In [None]:
ds.sel(band="vh").hvplot.quadmesh(x='x',
                                   y='y',
                                   # title='dis',
                                   # geo=True,
                                   width=750,
                                   height=700,
                                   rasterize=True,
                                   project=True,
                                   cmap="bmw",
                                   # clim=(50, 100),
                                   tiles='EsriImagery')

In [None]:
collection = catalog.get_child("sentinel-1-rtc")  # Need token to access!

In [None]:
BASE_TOKEN_URL = "https://planetarycomputer.microsoft.com/api/sas/v1/token"

collection = "sentinel-1-rtc"

key = "3f5d9411e2d8491387615b1f4c95e018"
collection_url = collection + "?subscription-key=" + key

url = os.path.join(BASE_TOKEN_URL, collection_url)
url

headers = {'accept': 'application/json'}
response = requests.get(url, headers=headers)
token = response.json()["token"]
token

In [None]:
sen1_rtc_url= "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-rtc"
sen1_rtc_url_token = sen1_rtc_url + "?" + token
sen1_rtc_url_token

In [None]:
response = requests.get(sen1_rtc_url)
collection_json = response.json()
collection_json

In [None]:
pystac.Collection(collection_json)

In [None]:
search = pystac_client.ItemSearch(
    url=sen1_rtc_url_token,
    bbox=bbox_of_interest,
    datetime=time_of_interest,
)

# # Check how many items were returned
# items = search.item_collection()
# print(f"Returned {len(items)} Items")

In [None]:
for item in search.items():
    print(item.id)
    break

In [None]:
# Murakami City, JP
bbox_of_interest = [139.4038110856679964, 38.1097418227999967, 139.4887555307919911, 38.1482418231080018]

time_of_interest = "2022-08-01/2022-08-06"

In [None]:
search = catalog.search(
    collections=["sentinel-1-rtc"],
    bbox=bbox_of_interest,
    datetime=time_of_interest,
)

# Check how many items were returned
items = search.item_collection()
print(f"Returned {len(items)} Items")

In [None]:
search = catalog.search(
    collections=["sentinel-2-l2a"],
    # intersects=area_of_interest,
    bbox=bbox_of_interest,
    datetime=time_of_interest,
    query={"eo:cloud_cover": {"lt": 90}},
)

# Check how many items were returned
items = search.item_collection()
print(f"Returned {len(items)} Items")

In [None]:
item = items[0]
item

In [None]:
blue_href = item.assets["B02"].href
green_href = item.assets["B03"].href
swir_href = item.assets["B11"].href

In [None]:
item.assets

In [None]:
%%time
with rasterio.open(blue_href) as ds:
    profile = ds.profile.copy()
    arr_blue = ds.read(1)
profile

In [None]:
arr_blue.shape

In [None]:
arr_blue