# SISTER workflow

In [3]:
import json
import os
import xml.etree.ElementTree as ET
import pandas as pd
import IPython
from itertools import groupby

# Import warnings module and ignore warnings in output below
import warnings
warnings.filterwarnings("ignore")

# Import and initialize MAAP class
from maap.maap import MAAP
maap = MAAP(maap_host="sister-api.imgspec.org")

### Create unique scene identifier

In [4]:
scenes = []

In [5]:
granules = ['https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL1BRAD.001/EMIT_L1B_RAD_001_20220810T174213_2222212_001/EMIT_L1B_RAD_001_20220810T174213_2222212_001.nc',
            'https://sister-ops-workspace.s3.us-west-2.amazonaws.com/prisma/raw/PRS_L1_STD_OFFL_20210730211000_20210730211004_0001.zip',
            'https://popo.jpl.nasa.gov/avcl/y18_data/f180126t01p00r08.tar.gz',
            'https://popo.jpl.nasa.gov/avng/y22/ang20220529t184338.tar.gz',
            'https://sister-ops-workspace.s3.us-west-2.amazonaws.com/desis/raw/DESIS-HSI-L1C-DT0685618328_002-20220204T204959-V0215.zip']

meta = '_aquatic_scenes_test'

crid = "500"

for l1_granule in granules:
    
    landsat = 'None'   

    base_name = os.path.basename(l1_granule)

    if base_name.startswith('DESIS'):
        sensor = 'DESIS'
        datetime = base_name[31:46]

    elif base_name.startswith('PRS'):
        sensor = 'PRISMA'
        datetime = base_name[16:24] + 'T' + base_name[24:30]
        landsat='https://sister-ops-workspace.s3.us-west-2.amazonaws.com/prisma/landsat_reference/PRS_%s_landsat.tar.gz' % base_name[16:50]

    elif base_name.startswith('ang'):
        sensor = 'AVNG'
        datetime = base_name[3:18]

    elif base_name.startswith('f'):
        sensor = 'AVCL'
        ''' AVIRIS classic filenames do not contain acquisition times,to be consistent with other
            sensors and to ensure identifier codes are unique a time string is created using other
            numbers in the filename            
        '''     

        datetime = "20%sT%s%s%s" % (base_name[1:7],
                                    base_name[8:10],
                                    base_name[11:13],
                                    base_name[14:16])
        
    elif base_name.startswith("EMIT"):
        sensor = 'EMIT'
        datetime = base_name.split('_')[4]
        
    else:
        raise ValueError('Unrecognized L1 datafile')

    job_args = {'sensor': sensor,
                'datetime': datetime,
                 'crid' : crid}
    job_args['preprocess'] = {'raw_dataset': l1_granule,
                              'landsat_dataset' : landsat}

    print(job_args)
    
    scenes.append(job_args)


{'sensor': 'EMIT', 'datetime': '20220810T174213', 'crid': '500', 'preprocess': {'raw_dataset': 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL1BRAD.001/EMIT_L1B_RAD_001_20220810T174213_2222212_001/EMIT_L1B_RAD_001_20220810T174213_2222212_001.nc', 'landsat_dataset': 'None'}}
{'sensor': 'PRISMA', 'datetime': '20210730T211000', 'crid': '500', 'preprocess': {'raw_dataset': 'https://sister-ops-workspace.s3.us-west-2.amazonaws.com/prisma/raw/PRS_L1_STD_OFFL_20210730211000_20210730211004_0001.zip', 'landsat_dataset': 'https://sister-ops-workspace.s3.us-west-2.amazonaws.com/prisma/landsat_reference/PRS_20210730211000_20210730211004_0001_landsat.tar.gz'}}
{'sensor': 'AVCL', 'datetime': '20180126T010008', 'crid': '500', 'preprocess': {'raw_dataset': 'https://popo.jpl.nasa.gov/avcl/y18_data/f180126t01p00r08.tar.gz', 'landsat_dataset': 'None'}}
{'sensor': 'AVNG', 'datetime': '20220529t184338', 'crid': '500', 'preprocess': {'raw_dataset': 'https://popo.jpl.nasa.gov/avng/y22/ang

## Step 1. Preprocess

In [6]:
for i,scene in enumerate(scenes):
        
    if scene['sensor'] in ['AVCL','EMIT']:
        queue="sister-job_worker-32gb"
    else:
        queue="sister-job_worker-16gb"
    
    identifier = f'SISTER_{scene["sensor"]}_L1B_RDN_{scene["datetime"]}_{scene["crid"]}{meta}'
    print(f'Identifier: {identifier}')

    status = ''
    while status != 'success':
        preprocess_job_response = maap.submitJob(
            algo_id = "sister-preprocess",
            version = "sister-dev",
            raw_dataset = scene['preprocess']['raw_dataset'],
            crid = scene['crid'],
            publish_to_cmr = False,
            cmr_metadata={},
            queue=queue,
            identifier= identifier)
        status = preprocess_job_response.status
        print(f'Submission status: {status}')
        print(f'Job ID: {preprocess_job_response.id}')
          
    scene['preprocess']['job_id'] = preprocess_job_response.id
    

Identifier: SISTER_EMIT_L1B_RDN_20220810T174213_500_aquatic_scenes_test
Submission status: success
Job ID: f511fee6-98fb-44f1-8698-dc3384554ad6
Identifier: SISTER_PRISMA_L1B_RDN_20210730T211000_500_aquatic_scenes_test
Submission status: success
Job ID: 4b24ba6a-f3f6-4fee-8b2e-05ac0c76b0ab
Identifier: SISTER_AVCL_L1B_RDN_20180126T010008_500_aquatic_scenes_test
Submission status: success
Job ID: 058a832e-d205-4502-9992-aef3e2296417
Identifier: SISTER_AVNG_L1B_RDN_20220529t184338_500_aquatic_scenes_test
Submission status: success
Job ID: dfe87ac1-478a-4efd-9256-5a3b12fb4eda
Identifier: SISTER_DESIS_L1B_RDN_20220204T204959_500_aquatic_scenes_test
Submission status: success
Job ID: 68cd510a-33d0-41ac-b9ea-efd38d6f711c


## Step 2. ISOFIT


In [7]:
for i,scene in enumerate(scenes):
        
    if scene['sensor'] == 'AVCL':
        segmentation_size = 100
    else:
        segmentation_size = 40
    
    identifier = f'SISTER_{scene["sensor"]}_L2A_RFL_{scene["datetime"]}_{scene["crid"]}{meta}'
    print(f'Identifier: {identifier}')

    preprocess_id = scene['preprocess']['job_id'] 
    
    preprocess_result= [ x for x in maap.getJobResult(preprocess_id).outputs if x.startswith("s3://s3.") and "RDN" in x]
    preprocess_result.sort()
    l1b_rdn,l1b_loc, l1b_obs = preprocess_result
    
    scene['preprocess']['radiance_dataset'] =l1b_rdn
    scene['preprocess']['location_dataset'] =l1b_loc
    scene['preprocess']['observation_dataset'] =l1b_obs
    status = ''
    while status != 'success':
        isofit_job_response = maap.submitJob(
                                        algo_id="sister-isofit",
                                        version="sister-dev",
                                        radiance_dataset=l1b_rdn,
                                        location_dataset = l1b_loc,
                                        observation_dataset = l1b_obs,
                                        segmentation_size = segmentation_size,
                                        n_cores=32,
                                        crid = scene['crid'],
                                        publish_to_cmr=False,
                                        cmr_metadata={},
                                        queue="sister-job_worker-32gb",
                                        identifier= identifier)
        status = isofit_job_response.status
        print(f'Submission status: {status}')
        print(f'Job ID: {isofit_job_response.id}')

    scene['isofit']  = {'job_id' : isofit_job_response.id}


Identifier: SISTER_EMIT_L2A_RFL_20220810T174213_500_aquatic_scenes_test
Submission status: success
Job ID: 305f8934-55b0-45db-9c94-cd145f09d8de
Identifier: SISTER_PRISMA_L2A_RFL_20210730T211000_500_aquatic_scenes_test
Submission status: success
Job ID: c28aaa0e-a043-4b38-8cb8-15da6b4df306
Identifier: SISTER_AVCL_L2A_RFL_20180126T010008_500_aquatic_scenes_test
Submission status: success
Job ID: f1923b6f-4e1d-4196-a8d1-7c12291ced37
Identifier: SISTER_AVNG_L2A_RFL_20220529t184338_500_aquatic_scenes_test
Submission status: success
Job ID: e1035462-1c3e-485c-ba0b-3864549a608d
Identifier: SISTER_DESIS_L2A_RFL_20220204T204959_500_aquatic_scenes_test
Submission status: success
Job ID: f3a7e1c2-7a62-4d67-8c7a-6de3bc491df9


## Step 3. Spectral resample

In [10]:
for i,scene in enumerate(scenes):

    identifier = f'SISTER_{scene["sensor"]}_L2A_RSRFL_{scene["datetime"]}_{scene["crid"]}{meta}'
    
    isofit_id = scene['isofit']['job_id'] 

    iso_result= [ x for x in maap.getJobResult(isofit_id).outputs if x.startswith("s3://s3.") and "RFL" in x]
    l2a_rfl,l2a_unc = iso_result
        
    scene['isofit']['reflectance_dataset'] =l2a_rfl
    scene['isofit']['uncertainty_dataset'] =l2a_unc

    resample_job_response = maap.submitJob(
                                            algo_id="sister-resample",
                                            version="sister-dev",
                                            reflectance_dataset= l2a_rfl,
                                            uncertainty_dataset= l2a_unc,
                                            crid = scene['crid'],
                                            publish_to_cmr=False,
                                            cmr_metadata={},
                                            queue="sister-job_worker-16gb",
                                            identifier=identifier)

    print(f'Identifier: {identifier}')
    print('Submission status: %s' % resample_job_response.status)
    print('Job ID: %s' % resample_job_response.id)
    scene['resample']  = {'job_id' : resample_job_response.id}


Identifier: SISTER_EMIT_L2A_RSRFL_20220810T174213_500_aquatic_scenes_test
Submission status: success
Job ID: e8a804d4-14d0-444f-8e78-51e183f5b06d
Identifier: SISTER_PRISMA_L2A_RSRFL_20210730T211000_500_aquatic_scenes_test
Submission status: success
Job ID: 8a415118-f9ac-4c04-b2b1-4db618e1c251
Identifier: SISTER_AVCL_L2A_RSRFL_20180126T010008_500_aquatic_scenes_test
Submission status: success
Job ID: f118593f-d7b9-4947-a880-672a56a680d1
Identifier: SISTER_AVNG_L2A_RSRFL_20220529t184338_500_aquatic_scenes_test
Submission status: success
Job ID: d60c75eb-ac9d-4d35-a1d0-d4654a11941e
Identifier: SISTER_DESIS_L2A_RSRFL_20220204T204959_500_aquatic_scenes_test
Submission status: success
Job ID: fcd6fd20-2b53-401e-bdba-d74fc0b5a1bf


## Step 3. Reflectance correction

In [11]:
for i,scene in enumerate(scenes):

    identifier = f'SISTER_{scene["sensor"]}_L2A_CORFL_{scene["datetime"]}_{scene["crid"]}{meta}'
    
    resample_id = scene['resample']['job_id'] 
    resample_result= [ x for x in maap.getJobResult(resample_id).outputs if x.startswith("s3://s3.") and "RSRFL" in x]
    l2a_rsrfl,l2a_rsunc = resample_result
    
    scene['resample']['reflectance_dataset'] =l2a_rsrfl
    scene['resample']['uncertainty_dataset'] =l2a_rsunc



    rfl_corr_job_response = maap.submitJob(
                                            algo_id="sister-reflect_correct",
                                            version="sister-dev",
                                            observation_dataset= scene['preprocess']['observation_dataset'],
                                            reflectance_dataset= l2a_rsrfl,
                                            crid = scene['crid'],
                                            publish_to_cmr=False,
                                            cmr_metadata={},
                                            queue="sister-job_worker-16gb",
                                            identifier=identifier)

    print(f'Identifier: {identifier}')
    print('Submission status: %s' % rfl_corr_job_response.status)
    print('Job ID: %s' % rfl_corr_job_response.id)
    scene['reflect_correct']  = {'job_id' : rfl_corr_job_response.id}


Identifier: SISTER_EMIT_L2A_CORFL_20220810T174213_500_aquatic_scenes_test
Submission status: success
Job ID: 1e36877f-f23c-4d5d-abf5-d297140e864f
Identifier: SISTER_PRISMA_L2A_CORFL_20210730T211000_500_aquatic_scenes_test
Submission status: success
Job ID: 25f188c5-bbb1-42e4-96ed-b04659142e78
Identifier: SISTER_AVCL_L2A_CORFL_20180126T010008_500_aquatic_scenes_test
Submission status: success
Job ID: e2ff5c75-f0eb-460c-928d-1c6feb979d4d
Identifier: SISTER_AVNG_L2A_CORFL_20220529t184338_500_aquatic_scenes_test
Submission status: success
Job ID: 4eb4fd54-ceeb-4867-9f79-5a57d41991bb
Identifier: SISTER_DESIS_L2A_CORFL_20220204T204959_500_aquatic_scenes_test
Submission status: success
Job ID: 232b9e4c-0d67-48a3-807d-24137c782ac8


## Step 4. Fractional Cover

In [12]:
for i,scene in enumerate(scenes):

    identifier = f'SISTER_{scene["sensor"]}_L2B_FRCOVER_{scene["datetime"]}_{scene["crid"]}{meta}'
    
    correct_id = scene['reflect_correct']['job_id'] 
    correct_result= [ x for x in maap.getJobResult(correct_id).outputs if x.startswith("s3://s3.") and "CORFL" in x]
    l2a_corfl =  correct_result[0]
    scene['reflect_correct']['reflectance_dataset'] = l2a_corfl

    frcover_job_response = maap.submitJob(
                                        algo_id="sister-fractional-cover",
                                        version="sister-dev",
                                        reflectance_dataset=l2a_corfl,
                                        n_cores= 32,
                                        refl_scale= 1,
                                        crid = scene['crid'],
                                        publish_to_cmr=False,
                                        cmr_metadata={},
                                        queue="sister-job_worker-32gb",
                                        identifier= identifier)

    print(f'Identifier: {identifier}')
    print('Submission status: %s' % frcover_job_response.status)
    print('Job ID: %s' % frcover_job_response.id)
    scene['frcover']  = {'job_id' : frcover_job_response.id}

Identifier: SISTER_EMIT_L2B_FRCOVER_20220810T174213_500_aquatic_scenes_test
Submission status: success
Job ID: b25bb9ec-7ddc-4bfd-91a7-51513433abeb
Identifier: SISTER_PRISMA_L2B_FRCOVER_20210730T211000_500_aquatic_scenes_test
Submission status: success
Job ID: fb2273af-31b3-4b60-8072-c707f8a4e2fd
Identifier: SISTER_AVCL_L2B_FRCOVER_20180126T010008_500_aquatic_scenes_test
Submission status: success
Job ID: 15f90690-7e23-4b2e-9251-3b3fa7b3c3e3
Identifier: SISTER_AVNG_L2B_FRCOVER_20220529t184338_500_aquatic_scenes_test
Submission status: success
Job ID: a544c1f7-9e49-4e9e-8f5b-19d0b63816a3
Identifier: SISTER_DESIS_L2B_FRCOVER_20220204T204959_500_aquatic_scenes_test
Submission status: success
Job ID: 97c4ede5-730b-439b-a733-b4fac7223853


## Step 6a. Vegetation biochemistry

In [13]:
for i,scene in enumerate(scenes):

    identifier = f'SISTER_{scene["sensor"]}_L2B_VEGBIOCHEM_{scene["datetime"]}_{scene["crid"]}{meta}'
    
    frcover_id = scene['frcover']['job_id'] 
    frcover_result= [x for x in maap.getJobResult(frcover_id).outputs if x.startswith("s3://s3.") and "FRCOV" in x]
    l2b_frcov =  frcover_result[0]                   
    scene['frcover']['frcover_dataset'] = l2b_frcov

    vegbiochem_job_response = maap.submitJob(
                            algo_id="sister-trait_estimate",
                            version="1.0.0",
                            reflectance_dataset= scene['reflect_correct']['reflectance_dataset'],
                            frcov_dataset=l2b_frcov,
                            veg_cover = 0.5,
                            crid = scene['crid'],
                            publish_to_cmr=False,
                            cmr_metadata={},
                            queue="sister-job_worker-32gb",
                            identifier=identifier)

    print(f'Identifier: {identifier}')
    print('Submission status: %s' % vegbiochem_job_response.status)
    print('Job ID: %s' % vegbiochem_job_response.id)
    scene['vegbiochem']  = {'job_id' : vegbiochem_job_response.id}

Identifier: SISTER_EMIT_L2B_VEGBIOCHEM_20220810T174213_500_aquatic_scenes_test
Submission status: success
Job ID: 7f01cb36-e4c4-4462-bab3-e8adb7c7e967
Identifier: SISTER_PRISMA_L2B_VEGBIOCHEM_20210730T211000_500_aquatic_scenes_test
Submission status: success
Job ID: 84a4bc35-b376-4787-b5c4-f9ebf22e80e1
Identifier: SISTER_AVCL_L2B_VEGBIOCHEM_20180126T010008_500_aquatic_scenes_test
Submission status: success
Job ID: e1f5d19c-31f1-4d6e-a7c1-d2eea02246f4
Identifier: SISTER_AVNG_L2B_VEGBIOCHEM_20220529t184338_500_aquatic_scenes_test
Submission status: success
Job ID: 1a2c5336-fcf7-4b44-ad52-0a3e2aec9939
Identifier: SISTER_DESIS_L2B_VEGBIOCHEM_20220204T204959_500_aquatic_scenes_test
Submission status: success
Job ID: 1359d1fe-cedb-4a92-a439-ad965ca62c77


## Step 6b. Snow grainsize

In [25]:
for i,scene in enumerate(scenes):
    identifier = f'SISTER_{scene["sensor"]}_L2B_GRAINSIZE_{scene["datetime"]}_{scene["crid"]}{meta}'
    
    frcover_id = scene['frcover']['job_id'] 
    frcover_result= [x for x in maap.getJobResult(frcover_id).outputs if x.startswith("s3://s3.") and "FRCOV" in x]
    l2b_frcov =  frcover_result[0]                   
    scene['frcover']['frcover_dataset'] = l2b_frcov

    grainsize_job_response = maap.submitJob(
        algo_id="sister-grainsize",
        version="1.0.0",
        reflectance_dataset=  scene['reflect_correct']['reflectance_dataset'],
        frcov_dataset= scene['frcover']['frcover_dataset'],
        snow_cover = 0.9,
        crid= scene['crid'],
        publish_to_cmr=False,
        cmr_metadata={},
        queue="sister-job_worker-16gb",
        identifier=identifier)
    
    print(f'Identifier: {identifier}')
    print('Submission status: %s' % grainsize_job_response.status)
    print('Job ID: %s' % grainsize_job_response.id)
    scene['grainsize']  = {'job_id' : grainsize_job_response.id}

Identifier: SISTER_AVCL_L2B_GRAINSIZE_20080706T010009_999_workflow_testing
Submission status: success
Job ID: 597363ae-50bf-4e3f-a7c1-f8b08956b6c8
Identifier: SISTER_AVNG_L2B_GRAINSIZE_20190718t165926_999_workflow_testing
Submission status: success
Job ID: 33daf9fb-432b-4b9c-923e-444e3d02b076
Identifier: SISTER_DESIS_L2B_GRAINSIZE_20190609T195930_999_workflow_testing
Submission status: success
Job ID: 13341715-f53c-48da-a86c-a5e8c8a46be5
Identifier: SISTER_PRISMA_L2B_GRAINSIZE_20221130T163741_999_workflow_testing
Submission status: success
Job ID: 3b40ff12-b4a5-412c-bb74-e415c61141f7
Identifier: SISTER_EMIT_L2B_GRAINSIZE_20230324T221148_999_workflow_testing
Submission status: success
Job ID: 025c410a-21cb-4880-a4b7-31a1ba635055
