# SISTER CWL submission

In [38]:
import json
import os
import xml.etree.ElementTree as ET
import pandas as pd

# Import warnings module and ignore warnings in output below
import warnings
warnings.filterwarnings("ignore")

# Import and initialize MAAP class
from maap.maap import MAAP
maap = MAAP(maap_host="sister-api.imgspec.org")

## Load production list with scene URLs


In [39]:
scene_df = pd.read_csv('./sister_test_production_list.csv', encoding='utf-8-sig')
# Strip unicode character
scene_df.l1_granule = scene_df.l1_granule.map(lambda x: x.replace('\ufeff',''))
scene_df.head()

Unnamed: 0,l1_granule
0,https://sister-ops-workspace.s3.us-west-2.amaz...
1,https://sister-ops-workspace.s3.us-west-2.amaz...
2,https://avng.jpl.nasa.gov/avng/y20_data/ang202...
3,https://popo.jpl.nasa.gov/avcl/y11_data/f11062...


## Workflow configuration generator

The configuration generator function takes as input the URL to the L1 granule and returns a scene identifier along with a workflow configuration for the scene

In [44]:
def gen_config(l1_granule):
    
    '''Generate CWL workflow configuration
    
    Arguments
    
    l1_granule (str): Input scene URL 
    
    Returns:
    
    identifier(str): Unique scene identification code
    
            SISTER_SENSOR_YYYMMDDTHHMMSS
        ex:
    
            SISTER_PRISMA_20200918T100312
            
    input_config (list): List of workflow PGE configurations

    '''
    
    landsat = 'None'   
    
    base_name = os.path.basename(l1_granule)
    
    if base_name.startswith('DESIS'):
        sensor = 'DESIS'
        datetime = base_name[31:46]
        
    elif base_name.startswith('PRS'):
        sensor = 'PRISMA'
        datetime = base_name[16:24] + 'T' + base_name[24:30]
        landsat='s3://sister-ops-workspace/prisma/landsat_reference/PRS_%s_landsat.tar.gz' % base_name[16:50]
        
    elif base_name.startswith('ang'):
        sensor = 'AVNG'
        datetime = base_name[3:18].upper()
        
    elif base_name.startswith('f'):
        sensor = 'AVCL'
        ''' AVIRIS classic filenames do not contain acquisition times,to be consistent with other
            sensors and to ensure identifier codes are unique a time string is created using other
            numbers in the filename            
        '''     
        
        datetime = "20%sT%s%s%s" % (base_name[1:7],
                                    base_name[8:10],
                                    base_name[11:13],
                                    base_name[14:16])
    
    identifier = 'SISTER_%s_%s' % (sensor,datetime)
    
    input_config =  [
    {
      "step_key": "l1_preprocess",
      "algorithm_id": "sister-preprocess_ubuntu",
      "version": "sister-dev",
      "queue": "sister-job_worker-32gb",
      "params": {
        "l1_granule": l1_granule,
        "landsat": landsat,
        "publish_to_cmr": False,
        "cmr_metadata": {},
        "identifier": "%s_L1B_RDN" % identifier
      }
    },
    {
      "step_key": "l2_reflectance",
      "algorithm_id": "isofit_ubuntu",
      "queue": "sister-job_worker-32gb",
      "version": "sister-dev",
      "input_filter": {
        "l1b_granule": "*RDN*.tar.gz"
      },
      "params": {
        "cmr_metadata": {},
        "l1b_granule": None,
        "n_cores": 32,
        "publish_to_cmr": False,
        "segmentation_size": 50,
        "snow_and_liquids_reflectance_spectra": "https://ecosis.org/api/package/emit-manually-adjusted-snow-and-liquids-reflectance-spectra/export",
        "surface_reflectance_spectra": "https://ecosis.org/api/package/emit-manually-adjusted-surface-reflectance-spectra/export",
        "vegetation_reflectance_spectra": "https://ecosis.org/api/package/emit-manually-adjusted-vegetation-reflectance-spectra/export",
        "water_reflectance_spectra": "https://ecosis.org/api/package/emit-manually-adjusted-water-reflectance-spectra/export"
      },
      "identifier": "%s_L2A_RFL" % identifier
    },
    {
      "step_key": "l2_resample",
      "algorithm_id": "sister-resample_ubuntu",
      "version": "sister-dev",
      "queue": "sister-job_worker-32gb",
      "input_filter": {
        "l2a_granule": "*RFL*.tar.gz"
      },
      "params": {
        "l2a_granule": None,
        "publish_to_cmr": False,
        "cmr_metadata": {},
        "identifier": "%s_L2A_RSRFL" % identifier
      }
    },
    {
      "step_key": "l2_reflectance_correction",
      "algorithm_id": "sister-reflect_correct_ubuntu",
      "version": "sister-dev",
      "queue": "sister-job_worker-32gb",
      "input_filter": {
        "l1b_granule": "*RDN*.tar.gz",
        "l2a_granule": "*RSRFL*.tar.gz"
      },
      "params": {
        "l1b_granule": None,
        "l2a_granule": None,
        "publish_to_cmr": False,
        "cmr_metadata": {},
        "identifier": "%s_L2A_CORFL" % identifier
      }
    }
  ]

    return identifier,json.dumps(input_config,indent=4)
    

## Generate single CWL workflow configuration

In [45]:
identifier,input_config = gen_config(scene_df.loc[1].l1_granule)
print(identifier)
print(input_config)


SISTER_PRISMA_20200911T184039
[
    {
        "step_key": "l1_preprocess",
        "algorithm_id": "sister-preprocess_ubuntu",
        "version": "sister-dev",
        "queue": "sister-job_worker-32gb",
        "params": {
            "l1_granule": "https://sister-ops-workspace.s3.us-west-2.amazonaws.com/prisma/raw/PRS_L1_STD_OFFL_20200911184039_20200911184043_0001.zip",
            "landsat": "s3://sister-ops-workspace/prisma/landsat_reference/PRS_20200911184039_20200911184043_0001_landsat.tar.gz",
            "publish_to_cmr": false,
            "cmr_metadata": {},
            "identifier": "SISTER_PRISMA_20200911T184039_L1B_RDN"
        }
    },
    {
        "step_key": "l2_reflectance",
        "algorithm_id": "isofit_ubuntu",
        "queue": "sister-job_worker-32gb",
        "version": "sister-dev",
        "input_filter": {
            "l1b_granule": "*RDN*.tar.gz"
        },
        "params": {
            "cmr_metadata": {},
            "l1b_granule": null,
            "n_cor

## Submit single CWL workflow job

In [46]:
job = maap.submitJob(
       algo_id="run_sister_workflow_ubuntu",
       version="dev",
       queue="sister-job_worker-8gb",
       identifier="%s_cwl_workflow" % identifier,
       username="anonymous",
       workflow_config=input_config)

print("Submitted %s CWL worflow job" %  identifier)
print("    Submission status: %s" % job.status )


Submitted SISTER_PRISMA_20200911T184039 CWL worflow job
    Submission status: success


## Loop through dataframe and start a CWL workflow for each each input scene 

In [43]:
job_ids = {}

for l1_granule in scene_df.l1_granule:
    scene_name = os.path.basename(l1_granule)   
    identifier,input_config = gen_config(l1_granule)
    
    job = maap.submitJob(
        algo_id="run_sister_workflow_ubuntu",
        version="dev",
        queue="sister-job_worker-8gb",
        identifier="%s_cwl_workflow" % identifier,
        username="anonymous",
        workflow_config=input_config)

    print("Submitted %s CWL worflow job" %  identifier)
    print("    Submission status: %s" % job.status )
    job_ids[scene_name] = job.id

Submitted SISTER_DESIS_20220618T210853 CWL worflow job
    Submission status: success
Submitted SISTER_PRISMA_20200911T184039 CWL worflow job
    Submission status: success
Submitted SISTER_AVNG_20200712T215355 CWL worflow job
    Submission status: success
Submitted SISTER_AVCL_20110620T010006 CWL worflow job
    Submission status: success
