# Copernicus: query and ingest

https://documentation.dataspace.copernicus.eu/APIs/STAC.html

https://documentation.dataspace.copernicus.eu/APIs/S3.html#example-script-to-download-product-using-boto3

In [1]:
import os
os.environ["abcli_path_bash"] = "{}/git/awesome-bash-cli/bash".format(os.getenv("HOME"))

In [2]:
import boto3
from pystac_client import Client
from abcli import fullname
from abcli import file, path, string
from abcli.modules import objects
from abcli.plugins.metadata import get_from_object
from blue_geo import env
from blue_geo import NAME, VERSION
from blue_geo.catalog.copernicus.sentinel_2.classes import CopernicusSentinel2Datacube
from blue_geo.logger import logger

logger.info(f"{NAME}-{VERSION}.{fullname()}, built on {string.pretty_date()}")

🌐  blue_geo-4.241.1.abcli-9.244.1, built on 18 August 2024, 14:04:10


In [3]:
URL = "https://catalogue.dataspace.copernicus.eu/stac"
client = Client.open(URL)

---

## query

In [4]:
object_name = f"query-{string.timestamp()}"
object_path = objects.object_path(object_name)

logger.info(f"📂 {object_name}")

🌐  📂 query-2024-08-18-14-04-11-78174


In [5]:
assert CopernicusSentinel2Datacube.query(
    object_name=object_name,
    bbox=[-122.78 - 0.1, 51.83 - 0.1, -122.78 + 0.1, 51.83 + 0.1],
    datetime="2024-07-30/2024-08-09",
    limit=10,
)

🌐  🔎 CopernicusSentinel2Datacube.query -> query-2024-08-18-14-04-11-78174
🌐  🔎 collections: ['SENTINEL-2']
🌐  🔎 bbox: [-122.88, 51.73, -122.68, 51.93]
🌐  🔎 datetime: 2024-07-30/2024-08-09
🌐  🔎 limit: 10
🌐  16 datacubes(s) found.
🌐  🧊 01: S2B_MSIL2A_20240805T191909_N0511_R099_T10UDC_20240805T234014-SAFE
🌐  🧊 02: S2A_MSIL1C_20240731T191911_N0511_R099_T10UEC_20240801T003519-SAFE
🌐  🧊 03: S2A_MSIL2A_20240731T191911_N0511_R099_T10UEC_20240801T023153-SAFE
🌐  🧊 04: S2B_MSIL2A_20240805T191909_N0511_R099_T10UEC_20240805T234014-SAFE
🌐  🧊 05: S2A_MSIL1C_20240731T191911_N0511_R099_T10UDC_20240801T003519-SAFE
🌐  🧊 06: S2B_MSIL2A_20240802T190919_N0511_R056_T10UEC_20240802T231522-SAFE
🌐  🧊 07: S2B_MSIL1C_20240802T190919_N0511_R056_T10UEC_20240802T224842-SAFE
🌐  🧊 08: S2A_MSIL1C_20240807T190911_N0511_R056_T10UEC_20240808T002811-SAFE
🌐  🧊 09: S2A_MSIL2A_20240807T190911_N0511_R056_T10UEC_20240808T020449-SAFE
🌐  🧊 10: S2B_MSIL1C_20240805T191909_N0511_R099_T10UEC_20240805T230124-SAFE
🌐  🧊 11: S2B_MSIL2A_2

In [6]:
datacube_id = get_from_object(object_name, "datacube_id")[0]
logger.info(f"🧊 {datacube_id}")

🌐  🧊 S2B_MSIL2A_20240805T191909_N0511_R099_T10UDC_20240805T234014-SAFE


## ingest

In [7]:
search_parameters = {
    "ids": [datacube_id.replace("-SAFE",".SAFE")]
}

for param in search_parameters:
    logger.info(f"{param}: {search_parameters[param]}")

🌐  ids: ['S2B_MSIL2A_20240805T191909_N0511_R099_T10UDC_20240805T234014.SAFE']


In [8]:
search = client.search(**search_parameters)

items = list(search.item_collection())

logger.info(f"{len(items)} item(s) found.")

for item in items:
    logger.info("🧊 {}: {} @ {}".format(item.id, item.datetime, ", ".join(list(item.assets.keys()))))

item = items[0]

🌐  1 item(s) found.
🌐  🧊 S2B_MSIL2A_20240805T191909_N0511_R099_T10UDC_20240805T234014.SAFE: 2024-08-05 19:19:09.024000+00:00 @ QUICKLOOK, PRODUCT


In [9]:
href = item.assets["PRODUCT"].extra_fields.get("alternate")["s3"]["href"]
bucket_name, s3_prefix = href.split("/", 2)[1:3]

logger.info(f"href: {href}")

logger.info(f"bucket_name: {bucket_name}")
logger.info(f"s3_prefix: {s3_prefix}")

🌐  href: /eodata/Sentinel-2/MSI/L2A/2024/08/05/S2B_MSIL2A_20240805T191909_N0511_R099_T10UDC_20240805T234014.SAFE
🌐  bucket_name: eodata
🌐  s3_prefix: Sentinel-2/MSI/L2A/2024/08/05/S2B_MSIL2A_20240805T191909_N0511_R099_T10UDC_20240805T234014.SAFE


In [10]:
s3 = boto3.resource(
    "s3",
    endpoint_url='https://eodata.dataspace.copernicus.eu',
    aws_access_key_id=env.COPERNICUS_AWS_ACCESS_KEY_ID,
    aws_secret_access_key=env.COPERNICUS_AWS_SECRET_ACCESS_KEY,
    region_name="default"
)

bucket = s3.Bucket(bucket_name)

In [11]:
list_of_files = [item.key.split(f"{s3_prefix}/",1)[1] for item in bucket.objects.filter(Prefix=s3_prefix)]

logger.info(f"{len(list_of_files)} file(s).")
for index, filename in enumerate(list_of_files):
    logger.info(f"#{index+1}: {filename}")

🌐  95 file(s).
🌐  #1: DATASTRIP/DS_2BPS_20240805T234014_S20240805T192816/MTD_DS.xml
🌐  #2: DATASTRIP/DS_2BPS_20240805T234014_S20240805T192816/QI_DATA/FORMAT_CORRECTNESS.xml
🌐  #3: DATASTRIP/DS_2BPS_20240805T234014_S20240805T192816/QI_DATA/GENERAL_QUALITY.xml
🌐  #4: DATASTRIP/DS_2BPS_20240805T234014_S20240805T192816/QI_DATA/GEOMETRIC_QUALITY.xml
🌐  #5: DATASTRIP/DS_2BPS_20240805T234014_S20240805T192816/QI_DATA/RADIOMETRIC_QUALITY.xml
🌐  #6: DATASTRIP/DS_2BPS_20240805T234014_S20240805T192816/QI_DATA/SENSOR_QUALITY.xml
🌐  #7: GRANULE/L2A_T10UDC_A038735_20240805T192816/AUX_DATA/AUX_CAMSFO
🌐  #8: GRANULE/L2A_T10UDC_A038735_20240805T192816/AUX_DATA/AUX_ECMWFT
🌐  #9: GRANULE/L2A_T10UDC_A038735_20240805T192816/IMG_DATA/R10m/T10UDC_20240805T191909_AOT_10m.jp2
🌐  #10: GRANULE/L2A_T10UDC_A038735_20240805T192816/IMG_DATA/R10m/T10UDC_20240805T191909_B02_10m.jp2
🌐  #11: GRANULE/L2A_T10UDC_A038735_20240805T192816/IMG_DATA/R10m/T10UDC_20240805T191909_B03_10m.jp2
🌐  #12: GRANULE/L2A_T10UDC_A038735_2024

In [12]:
list_of_items = bucket.objects.filter(Prefix=s3_prefix)
extension = "TCI.jp2"
dryrun = False

error_count = 0
for item in list_of_items:
    item_suffix = item.key.split(f"{s3_prefix}/",1)[1]
    if not item_suffix:
        continue
    
    item_filename = os.path.join(object_path,item_suffix)
    if not path.create(file.path(item_filename)):
        error_count += 1
        continue
    if item_filename.endswith(os.sep):
        continue
    
    if not item_filename.endswith(extension):
        continue
    
    logger.info("{} -> {}".format(item.key, item_filename))
    if dryrun:
        continue

    try:
        bucket.download_file(item.key, item_filename)
    except Exception as e:
        logger.error(e)
        error_count += 1
        continue

if error_count:
    logger.error(f"{error_count} error(s).")

---

In [13]:
# END