In [20]:
from pathlib import Path
import json
import pandas as pd

In [74]:
datasets_path = Path(".")

config_paths = datasets_path.rglob("*/collection/config.json")

configs = []

for path in config_paths:
    try:
        with open(path, "r") as f:
            config_data = json.load(f)

        collection_id = path.parent.parent.name
        config_data['collection_id'] = collection_id

        configs.append(config_data)
    except Exception as e:
        print(f"Error loading {path}: {e}")

print(f"Loaded {len(configs)} config.json files.")

Loaded 24 config.json files.


Every collection has a mosaic_info and render_config in config.json

In [8]:
all_key_sets = [set(config.keys()) for config in configs]
all_keys = set().union(*all_key_sets)
print(all_keys)

{'mosaic_info', 'render_config'}


In [11]:
all_mosaic_info_key_sets = [set(config.get('mosaic_info',{}).keys()) for config in configs]
all_mosaic_info_keys = set().union(*all_mosaic_info_key_sets)
print(all_mosaic_info_keys)

{'default_location', 'mosaics', 'animation_hint', 'render_options', 'default_custom_query'}


In [18]:
all_mosaic_key_sets = [
    set(mosaic.keys())
    for config in configs
    for mosaic in config.get('mosaic_info',{}).get('mosaics',[])
    ]

mosaic_key_union = set().union(*all_mosaic_key_sets)

print(mosaic_key_union)

{'name', 'description', 'cql'}


In [76]:
all_mosaics = [
    {**mosaic, "collection_id": config['collection_id']}
    for config in configs
    for mosaic in config.get('mosaic_info',{}).get('mosaics',[])
]

all_mosaics_df = pd.DataFrame(all_mosaics)

Commonly, a mosaic includes a cql search based on the properties of datetime, eo:cloud_cover, id, sar:polarizations, or usda_cdl:type

In [None]:
from collections import Counter

set(all_mosaics_df['cql'].explode().dropna().apply(lambda x: x.get('args')[0]).apply(str))

{"{'property': 'datetime'}",
 "{'property': 'eo:cloud_cover'}",
 "{'property': 'id'}",
 "{'property': 'sar:polarizations'}",
 "{'property': 'usda_cdl:type'}"}

In [84]:
list(all_mosaics_df.apply(lambda x: x.to_dict() if (('sentinel' not in x['collection_id']) and ('datetime' in str(x['cql']))) else None, axis=1).dropna())

[{'name': '2020',
  'description': '2020 Biodiversity Intactness',
  'cql': [{'op': 'anyinteracts',
    'args': [{'property': 'datetime'},
     {'interval': ['2020-01-01T00:00:00Z', '2020-12-31T23:59:59Z']}]}],
  'collection_id': 'io-biodiversity'},
 {'name': '2019',
  'description': '2019 Biodiversity Intactness',
  'cql': [{'op': 'anyinteracts',
    'args': [{'property': 'datetime'},
     {'interval': ['2019-01-01T00:00:00Z', '2019-12-31T23:59:59Z']}]}],
  'collection_id': 'io-biodiversity'},
 {'name': '2018',
  'description': '2018 Biodiversity Intactness',
  'cql': [{'op': 'anyinteracts',
    'args': [{'property': 'datetime'},
     {'interval': ['2018-01-01T00:00:00Z', '2018-12-31T23:59:59Z']}]}],
  'collection_id': 'io-biodiversity'},
 {'name': '2017',
  'description': '2017 Biodiversity Intactness',
  'cql': [{'op': 'anyinteracts',
    'args': [{'property': 'datetime'},
     {'interval': ['2017-01-01T00:00:00Z', '2017-12-31T23:59:59Z']}]}],
  'collection_id': 'io-biodiversity'},


In [None]:
list(all_mosaics_df.apply(lambda x: x.to_dict() if (('sentinel' not in x['collection_id']) and ('datetime' in str(x['cql']))) else None, axis=1).dropna())