In [2]:
import os
import json
from intake import open_catalog

In [3]:
# global arguments

USER = "charlesbluca"
REPO = "pangeo-datastore-stac"
BRANCH = "master"
STAC_VERSION = "1.0.0-beta.2"

url = f"https://raw.githubusercontent.com/{USER}/{REPO}/{BRANCH}"

root = {"href": f"{url}/master/catalog.json",
        "type": "application/json",
        "rel": "root"}

In [4]:
# generate catalog for root

cat = open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/master.yaml")

# populate base catalog
d = {"stac_version": STAC_VERSION,
     "id": cat.name,
     "title": cat.description,
     "description": "",
     "links": [root, {"href": f"{url}/master/catalog.json", 
                      "type": "application/json", 
                      "rel": "self"}]}

# add child links
for child in list(cat):
    d["links"].append({"href": f"{url}/master/{child}/catalog.json",
                       "type": "application/json",
                       "rel": "child",
                       "title": cat[child].description})

# write to file
os.makedirs("master", exist_ok=True)
with open("master/catalog.json", "w") as f:
    json.dump(d, f, indent=2)

In [5]:
# generate catalog/collections for all other entries

entries = cat.walk(depth=5)

for item in entries:
    if entries[item]._driver == "intake.catalog.local.YAMLFileCatalog":
        
        # populate base catalog
        d = {"stac_version": STAC_VERSION,
             "id": entries[item].name,
             "title": entries[item].description,
             "description": "",
             "links": [root]}
        
        # add parent + self links
        if len(item.split(".")) == 1:
            d["links"].append({"href": f"{url}/master/catalog.json",
                               "type": "application/json", 
                               "rel": "parent"})
        else:
            parent_str = "/".join(item.split(".")[:-1])
            d["links"].append({"href": f"{url}/master/{parent_str}/catalog.json", 
                               "type": "application/json",
                               "rel": "parent"})
        item_str = item.replace(".", "/")
        d["links"].append({"href": f"{url}/master/{item_str}/catalog.json", 
                           "type": "application/json",
                           "rel": "self"})
        
        # add child links
        for child in list(entries[item]):
            if entries[item][child]._driver == "intake.catalog.local.YAMLFileCatalog":
                d["links"].append({"href": f"{url}/master/{item_str}/{child}/catalog.json",
                                   "type": "application/json",
                                   "rel": "child",
                                   "title": entries[item][child].description})
            else:
                d["links"].append({"href": f"{url}/master/{item_str}/{child}/collection.json",
                                   "type": "application/json",
                                   "rel": "child",
                                   "title": entries[item][child].description})
        
        # write to file
        os.makedirs(f"master/{item_str}", exist_ok=True)
        with open(f"master/{item_str}/catalog.json", "w") as f:
            json.dump(d, f, indent=2)      
    else:
        
        # populate base collection
        d = {"stac_version": STAC_VERSION,
             "stac_extensions": ["collection-assets"],
             "id": entries[item].name,
             "title": entries[item].description,
             "description": "",
             "keywords": [],
             "extent": {"spatial": {"bbox": [[]]}, "temporal": {"interval": [[]]}},
             "providers": [
                 {"name": " World Climate Research Programme",
                  "roles": ["producer", "licensor"],
                  "url": "https://www.wcrp-climate.org/wgcm-cmip/wgcm-cmip6"},
                 {"name": "Pangeo",
                  "roles": ["processor"],
                  "url": "https://pangeo.io/"},
                 {"name": "Google",
                  "roles": ["host"],
                  "url": "https://cloud.google.com/storage"}],
             "license": "proprietary",
             "links": [root],
             "assets": {}}
        
        # add parent + self links
        parent_str = "/".join(item.split(".")[:-1])
        d["links"].append({"href": f"{url}/master/{parent_str}/catalog.json", 
                           "type": "application/json",
                           "rel": "parent"})
        item_str = item.replace(".", "/")
        d["links"].append({"href": f"{url}/master/{item_str}/collection.json",
                           "type": "application/json",
                           "rel": "self"})
        
        # add license link
        d["links"].append({"href": "https://pcmdi.llnl.gov/CMIP6/TermsOfUse/TermsOfUse6-1.html",
                           "type": "text/html",
                           "rel": "license",
                           "title": "CMIP6: Terms of Use"})
        
        # add zarr assets
        if entries[item]._driver == "zarr":
            d["assets"]["zmetadata"] = {"href": "https://storage.googleapis.com/" + entries[item].urlpath.lstrip("gs://") + "/.zmetadata",
                                        "description": "Consolidated metadata file for Zarr store",
                                        "type": "application/json",
                                        "roles": ["metadata", "zarr-consolidated-metadata"]}

        # write to file
        os.makedirs(f"master/{item_str}", exist_ok=True)
        with open(f"master/{item_str}/collection.json", "w") as f:
            json.dump(d, f, indent=2)

  s = self.get(**kwargs)
