# Copernicus: query and ingest

https://documentation.dataspace.copernicus.eu/APIs/STAC.html

https://documentation.dataspace.copernicus.eu/APIs/S3.html#example-script-to-download-product-using-boto3

In [1]:
import os
os.environ["abcli_path_bash"] = "{}/git/awesome-bash-cli/bash".format(os.getenv("HOME"))

In [2]:
import boto3
from pystac_client import Client
from abcli import fullname
from abcli import file, path, string
from abcli.modules import objects
from abcli.plugins.metadata import get_from_object
from blue_geo import env
from blue_geo import NAME, VERSION
from blue_geo.catalog.copernicus.sentinel_2.classes import CopernicusSentinel2Datacube
from blue_geo.logger import logger

logger.info(f"{NAME}-{VERSION}.{fullname()}, built on {string.pretty_date()}")

🌐  blue_geo-4.255.1.abcli-9.245.1, built on 21 August 2024, 19:42:25


In [3]:
URL = "https://catalogue.dataspace.copernicus.eu/stac"
client = Client.open(URL)

---

## query

In [4]:
object_name = f"query-{string.timestamp()}"
logger.info(f"📂 {object_name}")

🌐  📂 query-2024-08-21-19-42-26-25664


In [5]:
assert CopernicusSentinel2Datacube.query(
    object_name=object_name,
    bbox=[-122.78 - 0.1, 51.83 - 0.1, -122.78 + 0.1, 51.83 + 0.1],
    datetime="2024-07-30/2024-08-09",
    count=-1,
)

🌐  🔎 CopernicusSentinel2Datacube.query -> query-2024-08-21-19-42-26-25664
🌐  🔎 collections: ['SENTINEL-2']
🌐  🔎 bbox: [-122.88, 51.73, -122.68, 51.93]
🌐  🔎 datetime: 2024-07-30/2024-08-09
🌐  16 datacubes(s) found.
🌐  🧊 01: datacube-copernicus-sentinel_2-S2A_MSIL1C_20240731T191911_N0511_R099_T10UDC_20240801T003519-SAFE
🌐  🧊 02: datacube-copernicus-sentinel_2-S2A_MSIL1C_20240731T191911_N0511_R099_T10UEC_20240801T003519-SAFE
🌐  🧊 03: datacube-copernicus-sentinel_2-S2A_MSIL1C_20240807T190911_N0511_R056_T10UDC_20240808T002811-SAFE
🌐  🧊 04: datacube-copernicus-sentinel_2-S2A_MSIL1C_20240807T190911_N0511_R056_T10UEC_20240808T002811-SAFE
🌐  🧊 05: datacube-copernicus-sentinel_2-S2A_MSIL2A_20240731T191911_N0511_R099_T10UDC_20240801T023153-SAFE
🌐  🧊 06: datacube-copernicus-sentinel_2-S2A_MSIL2A_20240731T191911_N0511_R099_T10UEC_20240801T023153-SAFE
🌐  🧊 07: datacube-copernicus-sentinel_2-S2A_MSIL2A_20240807T190911_N0511_R056_T10UDC_20240808T020449-SAFE
🌐  🧊 08: datacube-copernicus-sentinel_2-S2A_

In [6]:
datacube_id = get_from_object(object_name, "datacube_id")[0]
logger.info(f"🧊 {datacube_id}")

🌐  🧊 datacube-copernicus-sentinel_2-S2A_MSIL1C_20240731T191911_N0511_R099_T10UDC_20240801T003519-SAFE


## ingest

In [7]:
search_parameters = {
    "ids": [datacube_id.replace("-SAFE",".SAFE").split("-",4)[3]]
}

for param in search_parameters:
    logger.info(f"{param}: {search_parameters[param]}")

🌐  ids: ['S2A_MSIL1C_20240731T191911_N0511_R099_T10UDC_20240801T003519.SAFE']


In [8]:
search = client.search(**search_parameters)

items = list(search.item_collection())

logger.info(f"{len(items)} item(s) found.")

for item in items:
    logger.info("🧊 {}: {} @ {}".format(item.id, item.datetime, ", ".join(list(item.assets.keys()))))

item = items[0]

🌐  1 item(s) found.
🌐  🧊 S2A_MSIL1C_20240731T191911_N0511_R099_T10UDC_20240801T003519.SAFE: 2024-07-31 19:19:11.024000+00:00 @ QUICKLOOK, PRODUCT


In [9]:
href = item.assets["PRODUCT"].extra_fields.get("alternate")["s3"]["href"]
bucket_name, s3_prefix = href.split("/", 2)[1:3]

logger.info(f"href: {href}")

logger.info(f"bucket_name: {bucket_name}")
logger.info(f"s3_prefix: {s3_prefix}")

🌐  href: /eodata/Sentinel-2/MSI/L1C/2024/07/31/S2A_MSIL1C_20240731T191911_N0511_R099_T10UDC_20240801T003519.SAFE
🌐  bucket_name: eodata
🌐  s3_prefix: Sentinel-2/MSI/L1C/2024/07/31/S2A_MSIL1C_20240731T191911_N0511_R099_T10UDC_20240801T003519.SAFE


In [10]:
s3 = boto3.resource(
    "s3",
    endpoint_url='https://eodata.dataspace.copernicus.eu',
    aws_access_key_id=env.COPERNICUS_AWS_ACCESS_KEY_ID,
    aws_secret_access_key=env.COPERNICUS_AWS_SECRET_ACCESS_KEY,
    region_name="default"
)

bucket = s3.Bucket(bucket_name)

In [11]:
list_of_files = [item.key.split(f"{s3_prefix}/",1)[1] for item in bucket.objects.filter(Prefix=s3_prefix)]

logger.info(f"{len(list_of_files)} file(s).")
for index, filename in enumerate(list_of_files):
    logger.info(f"#{index+1}: {filename}")

🌐  66 file(s).
🌐  #1: DATASTRIP/DS_2APS_20240801T003519_S20240731T192211/MTD_DS.xml
🌐  #2: DATASTRIP/DS_2APS_20240801T003519_S20240731T192211/QI_DATA/FORMAT_CORRECTNESS.xml
🌐  #3: DATASTRIP/DS_2APS_20240801T003519_S20240731T192211/QI_DATA/GENERAL_QUALITY.xml
🌐  #4: DATASTRIP/DS_2APS_20240801T003519_S20240731T192211/QI_DATA/GEOMETRIC_QUALITY.xml
🌐  #5: DATASTRIP/DS_2APS_20240801T003519_S20240731T192211/QI_DATA/RADIOMETRIC_QUALITY.xml
🌐  #6: DATASTRIP/DS_2APS_20240801T003519_S20240731T192211/QI_DATA/SENSOR_QUALITY.xml
🌐  #7: GRANULE/L1C_T10UDC_A047572_20240731T192211/AUX_DATA/AUX_CAMSFO
🌐  #8: GRANULE/L1C_T10UDC_A047572_20240731T192211/AUX_DATA/AUX_ECMWFT
🌐  #9: GRANULE/L1C_T10UDC_A047572_20240731T192211/IMG_DATA/T10UDC_20240731T191911_B01.jp2
🌐  #10: GRANULE/L1C_T10UDC_A047572_20240731T192211/IMG_DATA/T10UDC_20240731T191911_B02.jp2
🌐  #11: GRANULE/L1C_T10UDC_A047572_20240731T192211/IMG_DATA/T10UDC_20240731T191911_B03.jp2
🌐  #12: GRANULE/L1C_T10UDC_A047572_20240731T192211/IMG_DATA/T10UDC

In [12]:
list_of_items = bucket.objects.filter(Prefix=s3_prefix)
extension = "TCI.jp2"
dryrun = False

datacube_path = objects.object_path(datacube_id)


error_count = 0
for item in list_of_items:
    item_suffix = item.key.split(f"{s3_prefix}/",1)[1]
    if not item_suffix:
        continue
    
    item_filename = os.path.join(datacube_path,item_suffix)
    if not path.create(file.path(item_filename)):
        error_count += 1
        continue
    if item_filename.endswith(os.sep):
        continue
    
    if not item_filename.endswith(extension):
        continue
    
    logger.info("{} -> {}".format(item.key, item_filename))
    if dryrun:
        continue

    try:
        bucket.download_file(item.key, item_filename)
    except Exception as e:
        logger.error(e)
        error_count += 1
        continue

if error_count:
    logger.error(f"{error_count} error(s).")

🌐  Sentinel-2/MSI/L1C/2024/07/31/S2A_MSIL1C_20240731T191911_N0511_R099_T10UDC_20240801T003519.SAFE/GRANULE/L1C_T10UDC_A047572_20240731T192211/IMG_DATA/T10UDC_20240731T191911_TCI.jp2 -> /Users/kamangir/storage/abcli/datacube-copernicus-sentinel_2-S2A_MSIL1C_20240731T191911_N0511_R099_T10UDC_20240801T003519-SAFE/GRANULE/L1C_T10UDC_A047572_20240731T192211/IMG_DATA/T10UDC_20240731T191911_TCI.jp2


---

In [13]:
# END