# Test AWS Batch workflow

List of interferograms to process:
A76 ints:
* int_20191231_20180116
* int_20191231_20161228
* int_20191231_20150114

In [5]:
# if dinosar library not in base environment uncomment below (run just once)
!pip install --no-cache git+https://github.com/scottyhq/dinosar.git@master

Collecting git+https://github.com/scottyhq/dinosar.git@master
  Cloning https://github.com/scottyhq/dinosar.git (to revision master) to /tmp/pip-req-build-6hg4lf4u
  Running command git clone -q https://github.com/scottyhq/dinosar.git /tmp/pip-req-build-6hg4lf4u
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
Building wheels for collected packages: dinosar
  Building wheel for dinosar (PEP 517) ... [?25ldone
[?25h  Created wheel for dinosar: filename=dinosar-0.0.0-py3-none-any.whl size=20268 sha256=642f01858025816f17cc23def9ad651cd339709b5dbdadb24ee270da213b699b
  Stored in directory: /tmp/pip-ephem-wheel-cache-xvr1vc46/wheels/92/b0/87/750fc305aa2cac58744b55287e227c6fbf08fb234cb46ca2da
Successfully built dinosar
Installing collected packages: dinosar
Successfully installed dinosar-0.0.0


In [6]:
import subprocess
import os
import dinosar
import geopandas as gpd
import getpass

In [19]:
#gfa = gpd.read_file('apmb.geojson')
#gf = dinosar.archive.asf.load_inventory('query.geojson')
#snwe = dinosar.archive.asf.ogr2snwe('apmb.geojson')
#snwe

# Smaller SNWE, also in template.yml
relOrbit=76
snwe = [-23.0, -21.5, -68, -66.5]

In [20]:
# Plot S1 Footprints from S1 Archive (query.geojson)
def refresh_inventory(snwe, orbit=relOrbit):
    dinosar.archive.asf.query_asf(snwe, 'SA', orbit=relOrbit)
    dinosar.archive.asf.query_asf(snwe, 'SB', orbit=relOrbit)
    gf = dinosar.archive.asf.merge_inventories('query_SA.json', 'query_SB.json')
    dinosar.archive.asf.save_inventory(gf)

In [21]:
# Run if you want an up-to-date version of query.geojson
refresh_inventory(snwe, relOrbit)

Querying ASF Vertex for SA...
https://api.daac.asf.alaska.edu/services/search/param?intersectsWith=POLYGON+%28%28-66.5+-23%2C+-66.5+-21.5%2C+-68+-21.5%2C+-68+-23%2C+-66.5+-23%29%29&platform=SA&processingLevel=SLC&beamMode=IW&output=json&relativeOrbit=76
Querying ASF Vertex for SB...
https://api.daac.asf.alaska.edu/services/search/param?intersectsWith=POLYGON+%28%28-66.5+-23%2C+-66.5+-21.5%2C+-68+-21.5%2C+-68+-23%2C+-66.5+-23%29%29&platform=SB&processingLevel=SLC&beamMode=IW&output=json&relativeOrbit=76
Merging S1A and S1B inventories
Saved inventory:  query.geojson


## 2) identify pairs to process

In [22]:
# Can use pandas to get list of interferograms programmatically, for now just list a few:
pairs = ['int-20191231-20180116',
         'int-20191231-20161228',
         'int-20191231-20150114',
]

## 3) Generate processing directories and push to S3

In [23]:
# Create an S3 bucket and move the list of pairs to S3
bucket = 's3://dinosar/processing/uturuncu/A76-1'
pairsFile = 'pairs.txt'

paths = [bucket+'/'+x for x in pairs]
with open(pairsFile, 'w') as f:
    f.write('\n'.join(paths))

cmd = f'aws s3 cp {pairsFile} {bucket}/{pairsFile}'
print(cmd)
subprocess.call(cmd, shell=True)  # Uncomment to actually run command    

aws s3 cp pairs.txt s3://dinosar/processing/uturuncu/A76-1/pairs.txt


0

In [24]:
with open(pairsFile) as f:
    pairs = [line.rstrip() for line in f]
    mapping = dict(enumerate(pairs))
mapping

{0: 's3://dinosar/processing/uturuncu/A76-1/int-20191231-20180116',
 1: 's3://dinosar/processing/uturuncu/A76-1/int-20191231-20161228',
 2: 's3://dinosar/processing/uturuncu/A76-1/int-20191231-20150114'}

In [25]:
script = 'prep_topsApp_local'
template = 'templateA76.yml'
for i,p in enumerate(pairs):
    intname = os.path.basename(p)
    junk,master,slave=intname.split('-')
    intdir = f'int-{master}-{slave}'
    cmd = f'{script} -i query.geojson -m {master} -p {relOrbit} -s {slave} -t {template}'
    print(i, cmd)
    subprocess.call(cmd, shell=True) # Uncomment to actually run command  

0 prep_topsApp_local -i query.geojson -m 20191231 -p 76 -s 20180116 -t templateA76.yml
1 prep_topsApp_local -i query.geojson -m 20191231 -p 76 -s 20161228 -t templateA76.yml
2 prep_topsApp_local -i query.geojson -m 20191231 -p 76 -s 20150114 -t templateA76.yml


In [26]:
# Move these to cloud storage
# Push folder of text files to S3
for i,p in enumerate(pairs):
    intname = os.path.basename(p)
    junk,master,slave=intname.split('-')
    intdir = f'int-{master}-{slave}'
    cmd = f'aws s3 sync {intdir}/ {bucket}/{intdir}/'
    print(cmd)
    subprocess.call(cmd, shell=True)

print(f'Moved files to {bucket}')

aws s3 sync int-20191231-20180116/ s3://dinosar/processing/uturuncu/A76-1/int-20191231-20180116/
aws s3 sync int-20191231-20161228/ s3://dinosar/processing/uturuncu/A76-1/int-20191231-20161228/
aws s3 sync int-20191231-20150114/ s3://dinosar/processing/uturuncu/A76-1/int-20191231-20150114/
Moved files to s3://dinosar/processing/uturuncu/A76-1


## 4) Launch AWS Batch (WARNING: can consume lots of AWS resources)

In [27]:
# Enter your NASA URS password to download SLCs
nasauser = 'scottyhq'
nasapass = getpass.getpass()

 ····················


In [28]:
# pick whatever makes sense
jobname = 'uturuncu-A76-c5'

# don't change these:
demDir = 's3://dinosar/processing/uturuncu/dem'
jobdef = 'uturuncu-array-c5d'
jobqueue = 'dinosar-c5d'
array_size = len(pairs)


# NOTE: job-name, job-queue, and job-definition are JSON files that I've created for AWS Batch
# The specify type of computers to use, etc
cmd = f"aws batch submit-job \
--job-name {jobname} \
--job-queue {jobqueue} \
--job-definition {jobdef} \
--array-properties size={array_size} \
--parameters 'S3_PAIRS={bucket}/{pairsFile},S3_DEM={demDir}' \
--container-overrides 'environment=[{{name=NASAUSER,value={nasauser}}},{{name=NASAPASS,value={nasapass}}}]' \
"

# warning: this prints your password as plain text, careful not to push to github
#print(cmd)

subprocess.check_output(cmd, shell=True)

b'{\n    "jobName": "uturuncu-A76-c5",\n    "jobId": "08c34f2d-a363-458f-8f9b-8273a89548d7"\n}\n'

## 5) Wait for jobs to finish! 

* Merged/ results folder corresponding to s3://dinosar/processing/uturuncu/A76 found in s3://dinosar/results/uturuncu/A76

* For now, monitor jobs here: https://us-west-2.console.aws.amazon.com/batch/home?region=us-west-2#/jobs/queue/arn:aws:batch:us-west-2:783380859522:job-queue~2Futuruncu-queue?state=PENDING