## Harmony Py Library
### Job Pause/Resume Example

In [1]:
import sys
sys.path.append('..')

# Install harmony-py requirements.  Not necessary if you ran `pip install harmony-py` in your kernel  
!{sys.executable} -m pip install -q -r ../requirements/core.txt

from harmony import BBox, Client, Collection, Request, Environment

In [2]:
harmony_client = Client(env=Environment.UAT)  # assumes .netrc usage

collection = Collection(id='C1234088182-EEDTEST')
request = Request(
    collection=collection,
    max_results=101
)

In [3]:
# submit an async request for processing and return the job_id
# big requests get automatically paused after generating a preview of the results
job_id = harmony_client.submit(request)

In [4]:
# checking the status of the job we see that it is 'previewing'
harmony_client.status(job_id)

{'status': 'previewing',
 'message': 'The job is generating a preview before auto-pausing. CMR query identified 176 granules, but the request has been limited to process only the first 101 granules because you requested 101 maxResults.',
 'progress': 0,
 'created_at': datetime.datetime(2022, 5, 9, 17, 19, 2, 791000, tzinfo=tzutc()),
 'updated_at': datetime.datetime(2022, 5, 9, 17, 19, 4, 857000, tzinfo=tzutc()),
 'created_at_local': '2022-05-09T13:19:02-04:00',
 'updated_at_local': '2022-05-09T13:19:04-04:00',
 'request': 'https://harmony.uat.earthdata.nasa.gov/C1234088182-EEDTEST/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&maxResults=101',
 'num_input_granules': 101}

In [5]:
# 'wait_for_processing()' will wait while the job is in the 'previewing' state then
# warns that the job is paused before exiting
harmony_client.wait_for_processing(job_id, show_progress=True)

 [ Processing:   0% ] |                                                   | [\]
Job has been paused. Call `resume()` to resume.
 [ Processing: 100% ] |###################################################| [|]


In [6]:
# checking the status we see that the job is paused
harmony_client.status(job_id)

{'status': 'paused',
 'message': 'The job is paused.',
 'progress': 17,
 'created_at': datetime.datetime(2022, 5, 9, 17, 19, 2, 791000, tzinfo=tzutc()),
 'updated_at': datetime.datetime(2022, 5, 9, 17, 19, 30, 603000, tzinfo=tzutc()),
 'created_at_local': '2022-05-09T13:19:02-04:00',
 'updated_at_local': '2022-05-09T13:19:30-04:00',
 'request': 'https://harmony.uat.earthdata.nasa.gov/C1234088182-EEDTEST/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&maxResults=101',
 'num_input_granules': 101}

In [7]:
# 'result_json()' will not wait for paused jobs and just returns any available results.
results = harmony_client.download_all(job_id, directory='/tmp', overwrite=True)
count = 0
for r in results:
    count += 1
print(f'Got {count} results')

Job has been paused. Call `resume()` to resume.


Got 18 results


In [8]:
# we can resume the job with 'resume()'
harmony_client.resume(job_id)

In [9]:
# checking the status we see that the job is running again
harmony_client.status(job_id)

{'status': 'running',
 'message': 'The job is being processed',
 'progress': 34,
 'created_at': datetime.datetime(2022, 5, 9, 17, 19, 2, 791000, tzinfo=tzutc()),
 'updated_at': datetime.datetime(2022, 5, 9, 17, 19, 46, 105000, tzinfo=tzutc()),
 'created_at_local': '2022-05-09T13:19:02-04:00',
 'updated_at_local': '2022-05-09T13:19:46-04:00',
 'request': 'https://harmony.uat.earthdata.nasa.gov/C1234088182-EEDTEST/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&maxResults=101',
 'num_input_granules': 101}

In [10]:
# we can pause the job with 'pause()'.
harmony_client.pause(job_id)

In [11]:
# checking the status we see that the job is paused again
harmony_client.status(job_id)

{'status': 'paused',
 'message': 'The job is paused.',
 'progress': 47,
 'created_at': datetime.datetime(2022, 5, 9, 17, 19, 2, 791000, tzinfo=tzutc()),
 'updated_at': datetime.datetime(2022, 5, 9, 17, 19, 51, 334000, tzinfo=tzutc()),
 'created_at_local': '2022-05-09T13:19:02-04:00',
 'updated_at_local': '2022-05-09T13:19:51-04:00',
 'request': 'https://harmony.uat.earthdata.nasa.gov/C1234088182-EEDTEST/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&maxResults=101',
 'num_input_granules': 101}

In [12]:
# We can resume the job again
harmony_client.resume(job_id)

In [13]:
# 'wait_for_processing()' will show resumed progress
harmony_client.wait_for_processing(job_id, show_progress=True)

 [ Processing: 100% ] |###################################################| [|]


In [14]:
# 'download_all()' now has access to the full results
results = harmony_client.download_all(job_id, directory='/tmp', overwrite=True)
count = 0
for r in results:
    count += 1
print(f'Got {count} results')

Got 101 results


In [15]:
# Attempting to pause a completed job will result in an error
harmony_client.pause(job_id)

Exception: ('Conflict', 'Error: Job status cannot be updated from successful to paused.')

In [16]:
# Attempting to resume a completed job will also result in an error
harmony_client.resume(job_id)

Exception: ('Conflict', 'Error: Job status is successful - only paused jobs can be resumed.')

In [17]:
# we can use the 'skip_preview' parameter to tell Harmony to skip the auto-pause/preview and just start running
harmony_client = Client(env=Environment.UAT)  # assumes .netrc usage

collection = Collection(id='C1234088182-EEDTEST')
request = Request(
    collection=collection,
    max_results=101,
    skip_preview=True
)

In [18]:
# submit an async request for processing and return the job_id
# big requests get automatically paused after generating a preview of the results
job_id = harmony_client.submit(request)

In [19]:
# checking the status we see that the job is running
harmony_client.status(job_id)

{'status': 'running',
 'message': 'CMR query identified 176 granules, but the request has been limited to process only the first 101 granules because you requested 101 maxResults.',
 'progress': 0,
 'created_at': datetime.datetime(2022, 5, 9, 17, 21, 56, 118000, tzinfo=tzutc()),
 'updated_at': datetime.datetime(2022, 5, 9, 17, 21, 58, 361000, tzinfo=tzutc()),
 'created_at_local': '2022-05-09T13:21:56-04:00',
 'updated_at_local': '2022-05-09T13:21:58-04:00',
 'request': 'https://harmony.uat.earthdata.nasa.gov/C1234088182-EEDTEST/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&maxResults=101&skipPreview=true',
 'num_input_granules': 101}

In [20]:
# we can now use'wait_for_processing()' to wait until the job completes
harmony_client.wait_for_processing(job_id, show_progress=True)

 [ Processing: 100% ] |###################################################| [|]
