# EOCanvas Batch Processing with C2RCC on SNAP

## Setup

In [1]:
from eocanvas import API, Credentials
from eocanvas.api import Input, Config, ConfigOption
from eocanvas.processes import SnapProcess
from eocanvas.snap.graph import Graph

In [2]:
c = Credentials.load()

In [3]:
from hda import Client

In [4]:
c = Client()

In [5]:
c.metadata("EO:ESA:DAT:SENTINEL-2")

{'type': 'object',
 'title': 'Querable',
 'properties': {'dataset_id': {'title': 'dataset_id',
   'type': 'string',
   'oneOf': [{'const': 'EO:ESA:DAT:SENTINEL-2',
     'title': 'EO:ESA:DAT:SENTINEL-2',
     'group': None}]},
  'bbox': {'title': 'Bbox',
   'type': 'array',
   'minItems': 4,
   'maxItems': 4,
   'items': [{'type': 'number', 'maximum': 180, 'minimum': -180},
    {'type': 'number', 'maximum': 90, 'minimum': -90},
    {'type': 'number', 'maximum': 180, 'minimum': -180},
    {'type': 'number', 'maximum': 90, 'minimum': -90}]},
  'productIdentifier': {'title': 'Product Identifier',
   'type': 'string',
   'pattern': '^[a-zA-Z0-9]+$'},
  'productType': {'title': 'Product Type',
   'type': 'string',
   'oneOf': [{'const': 'S2MSI1C', 'title': 'S2MSI1C', 'group': None},
    {'const': 'S2MSI2A', 'title': 'S2MSI2A', 'group': None},
    {'const': 'AUX_GNSSRD', 'title': 'AUX_GNSSRD', 'group': None},
    {'const': 'AUX_PROQUA', 'title': 'AUX_PROQUA', 'group': None},
    {'const': 'AU

Default query template.

In [6]:
q = {
    "dataset_id": "EO:ESA:DAT:SENTINEL-2",
    "startdate": "",
    "enddate": "",
    "processingLevel": "S2MSI1C",
    "tileId": ""
}

Load AOI polygons.

In [7]:
import json

In [8]:
with open('AOIs.txt', 'r') as fp:
    poly_dict = json.load(fp)

In [9]:
poly_dict

{'2': {'geoRegion': 'POLYGON ((-5.22444257 49.9901843, -4.88225479 49.9901843, -4.88225479 50.29003327, -5.22444257 50.29003327, -5.22444257 49.9901843))',
  'tileId': '30UUA'},
 '3': {'geoRegion': 'POLYGON ((-4.81917128 50.19269224, -4.59558622 50.19269224, -4.59558622 50.40467681, -4.81917128 50.40467681, -4.81917128 50.19269224))',
  'tileId': '30UUA'},
 '4': {'geoRegion': 'POLYGON ((-4.32949749 50.26538565, -4.02885352 50.26538565, -4.02885352 50.5075424, -4.32949749 50.5075424, -4.32949749 50.26538565))',
  'tileId': '30UVA'},
 '6': {'geoRegion': 'POLYGON ((-4.02836874 50.14143345, -3.59050258 50.14143345, -3.59050258 50.34954708, -4.02836874 50.34954708, -4.02836874 50.14143345))',
  'tileId': '30UVA'},
 '9': {'geoRegion': 'POLYGON ((-3.70217003 50.32171356, -3.4070158 50.32171356, -3.4070158 50.54736093, -3.70217003 50.54736093, -3.70217003 50.32171356))',
  'tileId': '30UVA'},
 '11': {'geoRegion': 'POLYGON ((-4.42178593 50.96017888, -3.80799343 50.96017888, -3.80799343 51.44272

Group by tile.

In [27]:
tile_polys = {}
for k, v in poly_dict.items():
    tileId = v['tileId']
    geo_region = v['geoRegion']

    if tileId not in tile_polys:
        tile_polys[tileId] = {}
    tile_polys[tileId][k] = geo_region

In [28]:
tile_polys

{'30UUA': {'2': 'POLYGON ((-5.22444257 49.9901843, -4.88225479 49.9901843, -4.88225479 50.29003327, -5.22444257 50.29003327, -5.22444257 49.9901843))',
  '3': 'POLYGON ((-4.81917128 50.19269224, -4.59558622 50.19269224, -4.59558622 50.40467681, -4.81917128 50.40467681, -4.81917128 50.19269224))',
  '20': 'POLYGON ((-5.7897289 49.9734232, -5.3357743 49.9734232, -5.3357743 50.27582106, -5.7897289 50.27582106, -5.7897289 49.9734232))'},
 '30UVA': {'4': 'POLYGON ((-4.32949749 50.26538565, -4.02885352 50.26538565, -4.02885352 50.5075424, -4.32949749 50.5075424, -4.32949749 50.26538565))',
  '6': 'POLYGON ((-4.02836874 50.14143345, -3.59050258 50.14143345, -3.59050258 50.34954708, -4.02836874 50.34954708, -4.02836874 50.14143345))',
  '9': 'POLYGON ((-3.70217003 50.32171356, -3.4070158 50.32171356, -3.4070158 50.54736093, -3.70217003 50.54736093, -3.70217003 50.32171356))'},
 '30UVB': {'11': 'POLYGON ((-4.42178593 50.96017888, -3.80799343 50.96017888, -3.80799343 51.4427289, -4.42178593 51.4

Load query dates for each tile.

In [12]:
import csv

In [13]:
tile_dates = {}

In [14]:
with open("query_dates.csv", "r") as fp:
    reader = csv.reader(fp, delimiter=",")
    next(reader)
    for tileId, startdate, enddate in reader:
        tile_dates[tileId] = {'startdate': startdate, 'enddate': enddate}

In [15]:
tile_dates

{'30UUA': {'startdate': '2020-06-23T00:00:00.000Z',
  'enddate': '2020-06-24T00:00:00.000Z'},
 '30UVA': {'startdate': '2020-06-23T00:00:00.000Z',
  'enddate': '2020-06-24T00:00:00.000Z'},
 '30UVB': {'startdate': '2020-06-23T00:00:00.000Z',
  'enddate': '2020-06-24T00:00:00.000Z'},
 '30UWB': {'startdate': '2020-06-25T00:00:00.000Z',
  'enddate': '2020-06-26T00:00:00.000Z'},
 '30UXB': {'startdate': '2020-07-30T00:00:00.000Z',
  'enddate': '2020-07-31T00:00:00.000Z'},
 '30UWC': {'startdate': '2020-06-25T00:00:00.000Z',
  'enddate': '2020-06-26T00:00:00.000Z'},
 '29UPR': {'startdate': '2020-06-01T00:00:00.000Z',
  'enddate': '2020-06-02T00:00:00.000Z'},
 '30UUC': {'startdate': '2020-06-01T00:00:00.000Z',
  'enddate': '2020-06-02T00:00:00.000Z'},
 '30UVC': {'startdate': '2020-06-23T00:00:00.000Z',
  'enddate': '2020-06-24T00:00:00.000Z'},
 '31UCS': {'startdate': '2023-07-07T00:00:00.000Z',
  'enddate': '2023-07-08T00:00:00.000Z'},
 '31UCT': {'startdate': '2023-07-07T00:00:00.000Z',
  'endda

Process for example query: choose a tile and polygon, and use the query date for that tile.

In [16]:
tileId = '30UUA'
poly = tile_polys[tileId][0]
query_dates = tile_dates[tileId]

In [17]:
q_example = q.copy()

In [18]:
q_example['tileId'] = tileId
q_example['startdate'] = query_dates['startdate']
q_example['enddate'] = query_dates['enddate']
# and then poly used for $polygon in the XML file

In [19]:
q_example

{'dataset_id': 'EO:ESA:DAT:SENTINEL-2',
 'startdate': '2020-06-23T00:00:00.000Z',
 'enddate': '2020-06-24T00:00:00.000Z',
 'processingLevel': 'S2MSI1C',
 'tileId': '30UUA'}

## Process workflow

In [20]:
with open("s2_c2rcc.xml", "r") as fp:
    graph_file = fp.read()

In [21]:
# common to all processes
config = Config(key="img1", options=ConfigOption(uncompress=False, sub_path=""))

In [37]:
tiles_to_process = ['30UUC']

In [38]:
polys_to_process = ['18']

In [24]:
import os

In [39]:
for tileId in tiles_to_process:
    # Query parameters depend on tile
    q['tileId'] = tileId
    query_dates = tile_dates[tileId]
    q['startdate'] = query_dates['startdate']
    q['enddate'] = query_dates['enddate']
    r = c.search(q)
    # if done correctly, there should only be one or two results. Choose first.
    if not r:
        print(f"[WARNING] Search for query containing tileId {tileId} failed. Found none.")
        continue
    url = r.get_download_urls()[0]
    inputs = Input(key="img1", url=url)

    for poly_id, poly in tile_polys[tileId].items():
        if poly_id not in polys_to_process:
            continue
        modified_xml = graph_file.replace("$polygon", poly)
        graph = Graph.from_text(modified_xml)
        process = SnapProcess(snap_graph=graph, eo_config=config, eo_input=inputs)
        process.prepare_inputs()
        try:
            process.run(download_dir=f"result-{tileId}")  # save unordered in result-{tileId} directory
        except:
            print(f"[ERROR] Failed to process {tileId} poly {poly}")
            continue

Job: 131fdde7-692a-5d79-9070-fa2c2b9a66a8 - Status: accepted at 2025-02-25T15:26:25.427118
Job: 131fdde7-692a-5d79-9070-fa2c2b9a66a8 - Status: running at 2025-02-25T15:26:35.536217
Job: 131fdde7-692a-5d79-9070-fa2c2b9a66a8 - Status: running at 2025-02-25T15:26:46.631472
Job: 131fdde7-692a-5d79-9070-fa2c2b9a66a8 - Status: running at 2025-02-25T15:26:58.810546
Job: 131fdde7-692a-5d79-9070-fa2c2b9a66a8 - Status: running at 2025-02-25T15:27:12.176424
Job: 131fdde7-692a-5d79-9070-fa2c2b9a66a8 - Status: running at 2025-02-25T15:27:26.880065
Job: 131fdde7-692a-5d79-9070-fa2c2b9a66a8 - Status: running at 2025-02-25T15:27:43.080238
Job: 131fdde7-692a-5d79-9070-fa2c2b9a66a8 - Status: running at 2025-02-25T15:28:00.874984
Job: 131fdde7-692a-5d79-9070-fa2c2b9a66a8 - Status: running at 2025-02-25T15:28:20.431197
Job: 131fdde7-692a-5d79-9070-fa2c2b9a66a8 - Status: running at 2025-02-25T15:28:41.922779
Job: 131fdde7-692a-5d79-9070-fa2c2b9a66a8 - Status: running at 2025-02-25T15:29:05.602277
Job: 131f

## Errors

### 30UVB

In [24]:
job = 'fe5d37a1-2f8f-53c2-a484-83b206d4fce3'

In [27]:
api = API()

In [28]:
api.get_job_logs(job=job)

[LogEntry(timestamp=datetime.datetime(2025, 2, 13, 15, 50, 39, 678416, tzinfo=datetime.timezone.utc), message='time="2025-02-13T15:50:39.677Z" level=info msg="Starting Workflow Executor" version=v3.5.7'),
 LogEntry(timestamp=datetime.datetime(2025, 2, 13, 15, 50, 39, 681411, tzinfo=datetime.timezone.utc), message='time="2025-02-13T15:50:39.681Z" level=info msg="Using executor retry strategy" Duration=1s Factor=1.6 Jitter=0.5 Steps=5'),
 LogEntry(timestamp=datetime.datetime(2025, 2, 13, 15, 50, 39, 681418, tzinfo=datetime.timezone.utc), message='time="2025-02-13T15:50:39.681Z" level=info msg="Executor initialized" deadline="0001-01-01 00:00:00 +0000 UTC" includeScriptOutput=false namespace=ws-serverless podName=workflow-b6hsk-stage-in-1503837973 templateName=stage-in version="&Version{Version:v3.5.7,BuildDate:2024-05-27T06:18:59Z,GitCommit:503eef1357ebc9facc3f463708031441072ef7c2,GitTag:v3.5.7,GitTreeState:clean,GoVersion:go1.21.10,Compiler:gc,Platform:linux/amd64,}"'),
 LogEntry(timest

### 30UUC

In [33]:
api = API()

In [34]:
jobs = api.get_jobs()
jobs

[Job(api=<eocanvas.api.API object at 0x7fa64ac19690>, job_id='285fc5da-0f48-5e78-82c9-786112eecd1e', status='successful', started='2025-02-25 14:19:01', created='2025-02-25 14:19:01', updated='2025-02-25 14:22:10', finished='2025-02-25 14:22:09'),
 Job(api=<eocanvas.api.API object at 0x7fa64ac19690>, job_id='d891ce4a-e9e9-565f-9e52-79912836a970', status='failed', started='2025-02-25 13:48:32', created='2025-02-25 13:48:32', updated='2025-02-25 14:18:11', finished='2025-02-25 14:18:10'),
 Job(api=<eocanvas.api.API object at 0x7fa64ac19690>, job_id='cb1b68ce-5c45-5e51-b0cf-18070da6a458', status='successful', started='2025-02-14 16:31:39', created='2025-02-14 16:31:39', updated='2025-02-14 16:40:09', finished='2025-02-14 16:40:08'),
 Job(api=<eocanvas.api.API object at 0x7fa64ac19690>, job_id='4f53dd10-394e-5454-b20d-c969c495da75', status='successful', started='2025-02-14 16:14:44', created='2025-02-14 16:14:44', updated='2025-02-14 16:29:37', finished='2025-02-14 16:29:35'),
 Job(api=<eo

In [35]:
job = 'd891ce4a-e9e9-565f-9e52-79912836a970'

In [36]:
api.get_job_logs(job=job)

[LogEntry(timestamp=datetime.datetime(2025, 2, 25, 13, 48, 48, 880881, tzinfo=datetime.timezone.utc), message='time="2025-02-25T13:48:48.880Z" level=info msg="Starting Workflow Executor" version=v3.5.7'),
 LogEntry(timestamp=datetime.datetime(2025, 2, 25, 13, 48, 48, 883788, tzinfo=datetime.timezone.utc), message='time="2025-02-25T13:48:48.883Z" level=info msg="Using executor retry strategy" Duration=1s Factor=1.6 Jitter=0.5 Steps=5'),
 LogEntry(timestamp=datetime.datetime(2025, 2, 25, 13, 48, 48, 883814, tzinfo=datetime.timezone.utc), message='time="2025-02-25T13:48:48.883Z" level=info msg="Executor initialized" deadline="0001-01-01 00:00:00 +0000 UTC" includeScriptOutput=false namespace=ws-serverless podName=workflow-xqzrp-stage-in-3964513566 templateName=stage-in version="&Version{Version:v3.5.7,BuildDate:2024-05-27T06:18:59Z,GitCommit:503eef1357ebc9facc3f463708031441072ef7c2,GitTag:v3.5.7,GitTreeState:clean,GoVersion:go1.21.10,Compiler:gc,Platform:linux/amd64,}"'),
 LogEntry(timest