# Browseable STAC catalog

For custom Fufiters output 

In [1]:
# STAC Items already in S3
import pystac
import s3fs
import utils


In [2]:
s3 = s3fs.S3FileSystem() # reads local credentials
s3.ls('s3://fufiters/')

['fufiters/012_023790_IW1',
 'fufiters/121_258661_IW2',
 'fufiters/20230621_20230703',
 'fufiters/catalog.json',
 'fufiters/collection.json',
 'fufiters/quinn',
 'fufiters/scott']

In [3]:
jsonPath = 's3://fufiters/121_258661_IW2/20230617_20230629/S1_258661_IW2_20230617_20230629_VV_INT20_EA08/S1_258661_IW2_20230617_20230629_VV_INT20_EA08.json'
s3.ls(jsonPath)

['fufiters/121_258661_IW2/20230617_20230629/S1_258661_IW2_20230617_20230629_VV_INT20_EA08/S1_258661_IW2_20230617_20230629_VV_INT20_EA08.json']

In [4]:
from pystac.stac_io import DefaultStacIO, StacIO
from urllib.parse import urlparse

class CustomStacIO(DefaultStacIO):
   def __init__(self):
      self.s3 = s3fs.S3FileSystem()

   def read_text(self, source, *args, **kwargs):
      parsed = urlparse(source)
      if parsed.scheme == "s3":
         bucket = parsed.netloc
         key = parsed.path[1:]

         with s3.open(source, 'rb') as f:
            text = f.read().decode("utf-8")
         return text
      else:
         return super().read_text(source, *args, **kwargs)

#    def write_text(
#       self, dest: Union[str, Link], txt: str, *args: Any, **kwargs: Any
#    ) -> None:
#       parsed = urlparse(dest)
#       if parsed.scheme == "s3":
#          bucket = parsed.netloc
#          key = parsed.path[1:]
#          self.s3.Object(bucket, key).put(Body=txt, ContentEncoding="utf-8")
#       else:
#          super().write_text(dest, txt, *args, **kwargs)

StacIO.set_default(CustomStacIO)

In [5]:
item = pystac.read_file(jsonPath)

In [6]:
# List all items in STAC catalog 
itemJSONs = s3.glob('fufiters/**/*INT20*.json')

In [7]:
itemJSONs

['fufiters/121_258661_IW2/20230617_20230629/S1_258661_IW2_20230617_20230629_VV_INT20_EA08/S1_258661_IW2_20230617_20230629_VV_INT20_EA08.json',
 'fufiters/121_258661_IW2/20230617_20230711/S1_258661_IW2_20230617_20230711_VV_INT20_B642/S1_258661_IW2_20230617_20230711_VV_INT20_B642.json',
 'fufiters/121_258661_IW2/20230617_20230816/S1_258661_IW2_20230617_20230816_VV_INT20_3199/S1_258661_IW2_20230617_20230816_VV_INT20_3199.json']

In [8]:
items = [pystac.read_file('s3://'+f) for f in itemJSONs]
items

[<Item id=S1_258661_IW2_20230617_20230629_VV_INT20_EA08>,
 <Item id=S1_258661_IW2_20230617_20230711_VV_INT20_B642>,
 <Item id=S1_258661_IW2_20230617_20230816_VV_INT20_3199>]

In [9]:
# Download item stacs
#[s3.download(f, './') for f in itemJSONs]

In [10]:
CATALOG_ID = 'fufiters'

# Create new
catalog = pystac.Catalog(id=CATALOG_ID,
                        description='https://github.com/relativeorbit/incubator2024')

collection = utils.create_collection(CATALOG_ID)
catalog.add_child(collection)
collection.add_items(items)

# NOTE: subcatalogs based on STAC properties and datetime pieces
catalog.generate_subcatalogs(template='${burstId}/${year}')
catalog.normalize_hrefs('./')
catalog.validate()
catalog.save(catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED)

In [None]:
# Load existing
# catalog = pystac.read_file('catalog.json')
# collection = list(catalog.get_collections())[0]

# collection.add_items(items)
# # NOTE: subcatalogs based on STAC properties and datetime pieces
# catalog.generate_subcatalogs(template='${burstId}/${year}')
# catalog.normalize_hrefs('./')
# catalog.validate()
# catalog.save(catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED)