# Test AWS Batch workflow

List of interferograms to process:
A76 ints:
* int_20191231_20180116
* int_20191231_20161228
* int_20191231_20150114

In [1]:
# if dinosar library not in base environment uncomment below (run just once)
#!pip install --no-cache git+https://github.com/scottyhq/dinosar.git@master

In [26]:
import subprocess
import os
import dinosar
import geopandas as gpd
import getpass

In [3]:
gfa = gpd.read_file('apmb.geojson')
gf = dinosar.archive.asf.load_inventory('query.geojson')
snwe = dinosar.archive.asf.ogr2snwe('apmb.geojson')
snwe

[-23.68699916256815,
 -21.29081519506481,
 -68.46349117379683,
 -65.27091018760244]

## 2) identify pairs to process

In [32]:
# Can use pandas to get list of interferograms programmatically, for now just list a few:
relOrbit=76
pairs = ['int-20191231-20180116',
         'int-20191231-20161228',
         'int-20191231-20150114',
]

## 3) Generate processing directories and push to S3

In [27]:
# Create an S3 bucket and move the list of pairs to S3
bucket = 's3://dinosar/processing/uturuncu/A76'
pairsFile = 'pairs.txt'

paths = [bucket+'/'+x for x in pairs]
with open(pairsFile, 'w') as f:
    f.write('\n'.join(paths))

cmd = f'aws s3 cp {pairsFile} {bucket}/{pairsFile}'
print(cmd)
subprocess.call(cmd, shell=True)  # Uncomment to actually run command    

aws s3 cp pairs.txt s3://dinosar/processing/uturuncu/A76/pairs.txt


0

In [19]:
with open(pairsFile) as f:
    pairs = [line.rstrip() for line in f]
    mapping = dict(enumerate(pairs))
mapping

{0: 's3://dinosar/processing/uturuncu/A76/int-20191231-20180116',
 1: 's3://dinosar/processing/uturuncu/A76/int-20191231-20161228',
 2: 's3://dinosar/processing/uturuncu/A76/int-20191231-20150114'}

In [28]:
script = 'prep_topsApp_local'
template = 'template.yml'
for i,p in enumerate(pairs):
    intname = os.path.basename(p)
    junk,master,slave=intname.split('-')
    intdir = f'int-{master}-{slave}'
    cmd = f'{script} -i query.geojson -m {master} -p {relOrbit} -s {slave} -t {template}'
    print(i, cmd)
    subprocess.call(cmd, shell=True) # Uncomment to actually run command  

0 prep_topsApp_local -i query.geojson -m 20191231 -p 76 -s 20180116 -t template.yml
1 prep_topsApp_local -i query.geojson -m 20191231 -p 76 -s 20161228 -t template.yml
2 prep_topsApp_local -i query.geojson -m 20191231 -p 76 -s 20150114 -t template.yml


In [29]:
# Move these to cloud storage
# Push folder of text files to S3
for i,p in enumerate(pairs):
    intname = os.path.basename(p)
    junk,master,slave=intname.split('-')
    intdir = f'int-{master}-{slave}'
    cmd = f'aws s3 sync {intdir}/ {bucket}/{intdir}/'
    print(cmd)
    subprocess.call(cmd, shell=True)

print(f'Moved files to {bucket}')

aws s3 sync int-20191231-20180116/ s3://dinosar/processing/uturuncu/A76/int-20191231-20180116/
aws s3 sync int-20191231-20161228/ s3://dinosar/processing/uturuncu/A76/int-20191231-20161228/
aws s3 sync int-20191231-20150114/ s3://dinosar/processing/uturuncu/A76/int-20191231-20150114/
Moved files to s3://dinosar/processing/uturuncu/A76


## 4) Launch AWS Batch (WARNING: can consume lots of AWS resources)

In [12]:
# Enter your NASA URS password to download SLCs
nasauser = 'scottyhq'
nasapass = getpass.getpass()

 ····················


In [31]:
# pick whatever makes sense
jobname = 'uturuncu-A76-test'

# don't change these:
demDir = 's3://dinosar/processing/uturuncu/dem'
jobdef = 'uturuncu-array'
jobqueue = 'uturuncu-queue'
array_size = len(pairs)


# NOTE: job-name, job-queue, and job-definition are JSON files that I've created for AWS Batch
# The specify type of computers to use, etc
cmd = f"aws batch submit-job \
--job-name {jobname} \
--job-queue {jobqueue} \
--job-definition {jobdef} \
--array-properties size={array_size} \
--parameters 'S3_PAIRS={bucket}/{pairsFile},S3_DEM={demDir}' \
--container-overrides 'environment=[{{name=NASAUSER,value={nasauser}}},{{name=NASAPASS,value={nasapass}}}]' \
"

# warning: this prints your password as plain text, careful not to push to github
#print(cmd)

subprocess.check_output(cmd, shell=True)

b'{\n    "jobName": "uturuncu-A76-test",\n    "jobId": "b871173f-d102-4062-b7f9-1c352ad3e71b"\n}\n'

## 5) Wait for jobs to finish! 

* Merged/ results folder corresponding to s3://dinosar/processing/uturuncu/A76 found in s3://dinosar/results/uturuncu/A76

* For now, monitor jobs here: https://us-west-2.console.aws.amazon.com/batch/home?region=us-west-2#/jobs/queue/arn:aws:batch:us-west-2:783380859522:job-queue~2Futuruncu-queue?state=PENDING