# SISTER workflow

In [38]:
import json
import os
import xml.etree.ElementTree as ET
import pandas as pd
import IPython
from itertools import groupby

# Import warnings module and ignore warnings in output below
import warnings
warnings.filterwarnings("ignore")

# Import and initialize MAAP class
from maap.maap import MAAP
maap = MAAP(maap_host="sister-api.imgspec.org")

### Create unique scene identifier

In [45]:
scenes = []

In [46]:
granules = ['https://popo.jpl.nasa.gov/avcl/y11_data/f110816t01p00r08.tar.gz',
            'https://popo.jpl.nasa.gov/avcl/y11_data/f110814t01p00r16.tar.gz',
            'https://popo.jpl.nasa.gov/avcl/y11_data/f110810t01p00r07.tar.gz',
            'https://popo.jpl.nasa.gov/avcl/y10_data/f100826t01p00r07.tar.gz',
            'https://popo.jpl.nasa.gov/avcl/y09_data/f090729t01p00r05.tar.gz']
meta = '_NETWORKTEST'

crid = "993"

for l1_granule in granules:
    
    landsat = 'None'   

    base_name = os.path.basename(l1_granule)

    if base_name.startswith('DESIS'):
        sensor = 'DESIS'
        datetime = base_name[31:46]

    elif base_name.startswith('PRS'):
        sensor = 'PRISMA'
        datetime = base_name[16:24] + 'T' + base_name[24:30]
        landsat='https://sister-ops-workspace.s3.us-west-2.amazonaws.com/prisma/landsat_reference/PRS_%s_landsat.tar.gz' % base_name[16:50]

    elif base_name.startswith('ang'):
        sensor = 'AVNG'
        datetime = base_name[3:18]

    elif base_name.startswith('f'):
        sensor = 'AVCL'
        ''' AVIRIS classic filenames do not contain acquisition times,to be consistent with other
            sensors and to ensure identifier codes are unique a time string is created using other
            numbers in the filename            
        '''     

        datetime = "20%sT%s%s%s" % (base_name[1:7],
                                    base_name[8:10],
                                    base_name[11:13],
                                    base_name[14:16])
    else:
        raise ValueError('Unrecognized L1 datafile')

    job_args = {'sensor': sensor,
                'datetime': datetime,
                 'crid' : crid}
    job_args['preprocess'] = {'raw_dataset': l1_granule,
                              'landsat_dataset' : landsat}

    print(job_args)
    
    scenes.append(job_args)


{'sensor': 'AVCL', 'datetime': '20110816T010008', 'crid': '993', 'preprocess': {'raw_dataset': 'https://popo.jpl.nasa.gov/avcl/y11_data/f110816t01p00r08.tar.gz', 'landsat_dataset': 'None'}}
{'sensor': 'AVCL', 'datetime': '20110814T010016', 'crid': '993', 'preprocess': {'raw_dataset': 'https://popo.jpl.nasa.gov/avcl/y11_data/f110814t01p00r16.tar.gz', 'landsat_dataset': 'None'}}
{'sensor': 'AVCL', 'datetime': '20110810T010007', 'crid': '993', 'preprocess': {'raw_dataset': 'https://popo.jpl.nasa.gov/avcl/y11_data/f110810t01p00r07.tar.gz', 'landsat_dataset': 'None'}}
{'sensor': 'AVCL', 'datetime': '20100826T010007', 'crid': '993', 'preprocess': {'raw_dataset': 'https://popo.jpl.nasa.gov/avcl/y10_data/f100826t01p00r07.tar.gz', 'landsat_dataset': 'None'}}
{'sensor': 'AVCL', 'datetime': '20090729T010005', 'crid': '993', 'preprocess': {'raw_dataset': 'https://popo.jpl.nasa.gov/avcl/y09_data/f090729t01p00r05.tar.gz', 'landsat_dataset': 'None'}}


## Step 1. Preprocess

In [48]:
preprocess_job_response

{'job_id': '', 'status': 'failed', 'machine_type': None, 'architecture': None, 'machine_memory_size': None, 'directory_size': None, 'operating_system': None, 'job_start_time': None, 'job_end_time': None, 'job_duration_seconds': None, 'cpu_usage': None, 'cache_usage': None, 'mem_usage': None, 'max_mem_usage': None, 'swap_usage': None, 'read_io_stats': None, 'write_io_stats': None, 'sync_io_stats': None, 'async_io_stats': None, 'total_io_stats': None, 'error_details': None, 'response_code': 502, 'outputs': []}

In [47]:
for i,scene in enumerate(scenes):
        
    if scene['sensor'] == 'AVCL':
        queue="sister-job_worker-32gb"
    else:
        queue="sister-job_worker-16gb"
    
    identifier = f'SISTER_{scene["sensor"]}_L1B_RDN_{scene["datetime"]}_{scene["crid"]}{meta}'
    
    preprocess_job_response = maap.submitJob(
        algo_id = "sister-preprocess",
        version = "2.0.0",
        raw_dataset = scene['preprocess']['raw_dataset'],
        landsat_dataset = scene['preprocess']['landsat_dataset'],
        crid = scene['crid'],
        publish_to_cmr = False,
        cmr_metadata={},
        queue=queue,
        identifier= identifier)

    print(f'Identifier: {identifier}')
    print(f'Submission status: {preprocess_job_response.status}')
    print(f'Job ID: {preprocess_job_response.id}')
          
    scene['preprocess']['job_id'] = preprocess_job_response.id
    

Identifier: SISTER_AVCL_L1B_RDN_20110816T010008_993_NETWORKTEST
Submission status: failed
Job ID: 
Identifier: SISTER_AVCL_L1B_RDN_20110814T010016_993_NETWORKTEST
Submission status: failed
Job ID: 
Identifier: SISTER_AVCL_L1B_RDN_20110810T010007_993_NETWORKTEST
Submission status: success
Job ID: 886b584c-8cb5-4133-9d68-f7d424826eb0
Identifier: SISTER_AVCL_L1B_RDN_20100826T010007_993_NETWORKTEST
Submission status: success
Job ID: 6853e9c7-0d79-4948-bdb5-4ef3796e8029
Identifier: SISTER_AVCL_L1B_RDN_20090729T010005_993_NETWORKTEST
Submission status: failed
Job ID: 


## Step 2. ISOFIT


In [35]:
for i,scene in enumerate(scenes[:1]):
        
    if scene['sensor'] == 'AVCL':
        segmentation_size = 100
    else:
        segmentation_size = 25
    
    identifier = f'SISTER_{scene["sensor"]}_L2A_RFL_{scene["datetime"]}_{scene["crid"]}{meta}'
    
    preprocess_id = scene['preprocess']['job_id'] 
    
    preprocess_result= [ x for x in maap.getJobResult(preprocess_id).outputs if x.startswith("s3://s3.") and "RDN" in x]
    preprocess_result.sort()
    l1b_rdn,l1b_loc, l1b_obs = preprocess_result
    
    scene['preprocess']['radiance_dataset'] =l1b_rdn
    scene['preprocess']['location_dataset'] =l1b_loc
    scene['preprocess']['observation_dataset'] =l1b_obs

    isofit_job_response = maap.submitJob(
                                    algo_id="sister-isofit",
                                    version="sister-dev",
                                    radiance_dataset=l1b_rdn,
                                    location_dataset = l1b_loc,
                                    observation_dataset = l1b_obs,
                                    segmentation_size = segmentation_size,
                                    n_cores=32,
                                    crid = scene['crid'],
                                    publish_to_cmr=False,
                                    cmr_metadata={},
                                    queue="sister-job_worker-32gb",
                                    identifier= identifier)

    print(f'Identifier: {identifier}')
    print(f'Submission status: {isofit_job_response.status}')
    print(f'Job ID: {isofit_job_response.id}')
    
    scene['isofit']  = {'job_id' : isofit_job_response.id}


Identifier: SISTER_AVCL_L2A_RFL_20110814T010016_997_SBG_GLEON
Submission status: success
Job ID: 071976ed-8af1-410e-ac10-aeba44d169eb


In [19]:
scenes[1]['preprocess']['job_id'] = 'a7d9634f-672c-450c-bfcf-d6e27eea91ae'

## Step 3. Spectral resample

In [22]:
for i,scene in enumerate(scenes[:2]):

    identifier = f'SISTER_{scene["sensor"]}_L2A_RSRFL_{scene["datetime"]}_{scene["crid"]}{meta}'
    
    isofit_id = scene['isofit']['job_id'] 

    iso_result= [ x for x in maap.getJobResult(isofit_id).outputs if x.startswith("s3://s3.") and "RFL" in x]
    l2a_rfl,l2a_unc = iso_result
        
    scene['isofit']['reflectance_dataset'] =l2a_rfl
    scene['isofit']['uncertainty_dataset'] =l2a_unc

    resample_job_response = maap.submitJob(
                                            algo_id="sister-resample",
                                            version="sister-dev",
                                            reflectance_dataset= l2a_rfl,
                                            uncertainty_dataset= l2a_unc,
                                            crid = scene['crid'],
                                            publish_to_cmr=False,
                                            cmr_metadata={},
                                            queue="sister-job_worker-32gb",
                                            identifier=identifier)

    print(f'Identifier: {identifier}')
    print('Submission status: %s' % resample_job_response.status)
    print('Job ID: %s' % resample_job_response.id)
    scene['resample']  = {'job_id' : resample_job_response.id}


Identifier: SISTER_AVCL_L2A_RSRFL_20110816T010008_999_SBG_GLEON
Submission status: success
Job ID: 977e88a2-6054-4a5a-b664-42b157b14d3f
Identifier: SISTER_AVCL_L2A_RSRFL_20110814T010016_999_SBG_GLEON
Submission status: success
Job ID: ac84c589-7b3b-4169-9dc5-64471a6832d0


## Step 3. Reflectance correction

In [23]:
for i,scene in enumerate(scenes[:1]):

    identifier = f'SISTER_{scene["sensor"]}_L2A_CORFL_{scene["datetime"]}_{scene["crid"]}{meta}'
    
    resample_id = scene['resample']['job_id'] 
    resample_result= [ x for x in maap.getJobResult(resample_id).outputs if x.startswith("s3://s3.") and "RSRFL" in x]
    l2a_rsrfl,l2a_rsunc = resample_result
    
    scene['resample']['reflectance_dataset'] =l2a_rsrfl
    scene['resample']['uncertainty_dataset'] =l2a_rsunc



    rfl_corr_job_response = maap.submitJob(
                                            algo_id="sister-reflect_correct",
                                            version="2.0.0",
                                            observation_dataset= scene['preprocess']['observation_dataset'],
                                            reflectance_dataset= l2a_rsrfl,
                                            crid = scene['crid'],
                                            publish_to_cmr=False,
                                            cmr_metadata={},
                                            queue="sister-job_worker-32gb",
                                            identifier=identifier)

    print(f'Identifier: {identifier}')
    print('Submission status: %s' % rfl_corr_job_response.status)
    print('Job ID: %s' % rfl_corr_job_response.id)
    scene['reflect_correct']  = {'job_id' : rfl_corr_job_response.id}


Identifier: SISTER_PRISMA_L2A_CORFL_20201225T185042_999
Submission status: success
Job ID: 0c041470-c7d1-49f1-a4e9-4bce52a20de3


## Step 4. Fractional Cover

In [62]:
for i,scene in enumerate(scenes[1:3]):

    identifier = f'SISTER_{scene["sensor"]}_L2B_FRCOVER_{scene["datetime"]}_{scene["crid"]}{meta}'
    
    correct_id = scene['reflect_correct']['job_id'] 
    correct_result= [ x for x in maap.getJobResult(correct_id).outputs if x.startswith("s3://s3.") and "CORFL" in x]
    l2a_corfl =  correct_result[0]
    scene['reflect_correct']['reflectance_dataset'] = l2a_corfl

    frcover_job_response = maap.submitJob(
                                        algo_id="sister-fractional-cover",
                                        version="1.0.0",
                                        reflectance_dataset=l2a_corfl,
                                        n_cores= 20,
                                        refl_scale= 1,
                                        normalization = 'brightness',
                                        crid = scene['crid'],
                                        publish_to_cmr=False,
                                        cmr_metadata={},
                                        queue="sister-job_worker-32gb",
                                        identifier= identifier)

    print(f'Identifier: {identifier}')
    print('Submission status: %s' % frcover_job_response.status)
    print('Job ID: %s' % frcover_job_response.id)
    scene['frcover']  = {'job_id' : frcover_job_response.id}

Identifier: SISTER_AVNG_L2B_FRCOVER_20210429t185927_001
Submission status: success
Job ID: 7aa78421-f947-40a3-815f-f62601a9d5a3
Identifier: SISTER_AVCL_L2B_FRCOVER_20130612T010012_001
Submission status: success
Job ID: ee051eb3-6de2-4c60-ade1-43ea0344df62


## Step 6a. Vegetation biochemistry

In [63]:
for i,scene in enumerate(scenes[2:3]):

    identifier = f'SISTER_{scene["sensor"]}_L2B_VEGBIOCHEM_{scene["datetime"]}_{scene["crid"]}{meta}'
    
    frcover_id = scene['frcover']['job_id'] 
    frcover_result= [x for x in maap.getJobResult(frcover_id).outputs if x.startswith("s3://s3.") and "FRCOV" in x]
    l2b_frcov =  frcover_result[0]                   
    scene['frcover']['frcover_dataset'] = l2b_frcov

    vegbiochem_job_response = maap.submitJob(
                            algo_id="sister-trait_estimate",
                            version="1.0.0",
                            reflectance_dataset= scene['reflect_correct']['reflectance_dataset'],
                            frcov_dataset=l2b_frcov,
                            veg_cover = 0.5,
                            crid = scene['crid'],
                            publish_to_cmr=False,
                            cmr_metadata={},
                            queue="sister-job_worker-32gb",
                            identifier=identifier)

    print(f'Identifier: {identifier}')
    print('Submission status: %s' % vegbiochem_job_response.status)
    print('Job ID: %s' % vegbiochem_job_response.id)
    scene['vegbiochem']  = {'job_id' : vegbiochem_job_response.id}

Identifier: SISTER_AVCL_L2B_VEGBIOCHEM_20130612T010012_001
Submission status: success
Job ID: 585874a3-49ee-4ab7-a644-bb9a78381cf9


## Step 6b. Snow grainsize

In [65]:
for i,scene in enumerate(scenes[2:3]):
    identifier = f'SISTER_{scene["sensor"]}_L2B_GRAINSIZE_{scene["datetime"]}_{scene["crid"]}{meta}'
    
    frcover_id = scene['frcover']['job_id'] 
    frcover_result= [x for x in maap.getJobResult(frcover_id).outputs if x.startswith("s3://s3.") and "FRCOV" in x]
    l2b_frcov =  frcover_result[0]                   
    scene['frcover']['frcover_dataset'] = l2b_frcov

    grainsize_job_response = maap.submitJob(
        algo_id="sister-grainsize",
        version="1.0.0",
        reflectance_dataset=  scene['reflect_correct']['reflectance_dataset'],
        frcov_dataset= scene['frcover']['frcover_dataset'],
        snow_cover = 0.9,
        crid= scene['crid'],
        publish_to_cmr=False,
        cmr_metadata={},
        queue="sister-job_worker-32gb",
        identifier=identifier)
    
    print(f'Identifier: {identifier}')
    print('Submission status: %s' % grainsize_job_response.status)
    print('Job ID: %s' % grainsize_job_response.id)
    scene['grainsize']  = {'job_id' : grainsize_job_response.id}

Identifier: SISTER_AVCL_L2B_GRAINSIZE_20130612T010012_001
Submission status: success
Job ID: 8b1453b2-899e-4ca1-9caa-772bef4fe0b5


In [50]:
identifier = f'SISTER_AVCL_L2A_RSRFL_20130612T010014_001'

l2a_rfl = 's3://s3.us-west-2.amazonaws.com:80/sister-ops-workspace/LOM/PRODUCTS/AVCL/L2A_RFL/2013/06/12/SISTER_AVCL_L2A_RFL_20130612T191820_001'
l2a_unc =  's3://s3.us-west-2.amazonaws.com:80/sister-ops-workspace/LOM/PRODUCTS/AVCL/L2A_RFL/2013/06/12/SISTER_AVCL_L2A_RFL_20130612T191820_001_UNC'

resample_job_response = maap.submitJob(
                                        algo_id="sister-resample",
                                        version="2.0.0",
                                        reflectance_dataset= l2a_rfl,
                                        uncertainty_dataset= l2a_unc,
                                        crid = "001",
                                        publish_to_cmr=False,
                                        cmr_metadata={},
                                        queue="sister-job_worker-16gb",
                                        identifier=identifier)

print(f'Identifier: {identifier}')
print('Submission status: %s' % resample_job_response.status)
print('Job ID: %s' % resample_job_response.id)
scene['resample']  = {'job_id' : resample_job_response.id}

Identifier: SISTER_AVCL_L2A_RSRFL_20130612T010014_001
Submission status: success
Job ID: e0a034c8-354e-4edb-b2d7-349a6f256b76


In [53]:
identifier = f'SISTER_AVCL_L2A_CORFL_20130612T010014_001'
l1b_obs  = 's3://s3.us-west-2.amazonaws.com:80/sister-ops-workspace/LOM/PRODUCTS/AVCL/L1B_RDN/2013/06/12/SISTER_AVCL_L1B_RDN_20130612T191820_001_OBS'
l2a_rsrfl = 's3://s3.us-west-2.amazonaws.com:80/sister-ops-workspace/LOM/PRODUCTS/AVCL/L2A_RSRFL/2013/06/12/SISTER_AVCL_L2A_RSRFL_20130612T191820_001'
rfl_corr_job_response = maap.submitJob(
                                        algo_id="sister-reflect_correct",
                                        version="2.0.0",
                                        observation_dataset= l1b_obs,
                                        reflectance_dataset= l2a_rsrfl,
                                        crid = '001',
                                        publish_to_cmr=False,
                                        cmr_metadata={},
                                        queue="sister-job_worker-32gb",
                                        identifier=identifier)

In [67]:
identifier = f'SISTER_AVCL_L2B_FRCOVER_20130612T010014_001'
l2a_corfl = 's3://s3.us-west-2.amazonaws.com:80/sister-ops-workspace/LOM/PRODUCTS/AVCL/L2A_CORFL/2013/06/12/SISTER_AVCL_L2A_CORFL_20130612T191820_001'

frcover_job_response = maap.submitJob(
                                    algo_id="sister-fractional-cover",
                                    version="1.0.0",
                                    reflectance_dataset=l2a_corfl,
                                    n_cores= 20,
                                    refl_scale= 1,
                                    normalization = 'brightness',
                                    crid = "001",
                                    publish_to_cmr=False,
                                    cmr_metadata={},
                                    queue="sister-job_worker-32gb",
                                    identifier= identifier)




In [71]:
identifier = f'SISTER_AVCL_L2B_VEGBIOCHEM_20130612T010014_001'
l2a_corfl = 's3://s3.us-west-2.amazonaws.com:80/sister-ops-workspace/LOM/PRODUCTS/AVCL/L2A_CORFL/2013/06/12/SISTER_AVCL_L2A_CORFL_20130612T191820_001'
l2b_frcov = 's3://s3.us-west-2.amazonaws.com:80/sister-ops-workspace/LOM/PRODUCTS/AVCL/L2B_FRCOV/2013/06/12/SISTER_AVCL_L2B_FRCOV_20130612T191820_001'

vegbiochem_job_response = maap.submitJob(
                            algo_id="sister-trait_estimate",
                            version="1.0.0",
                            reflectance_dataset= l2a_corfl,
                            frcov_dataset=l2b_frcov,
                            veg_cover = 0.5,
                            crid = "001",
                            publish_to_cmr=False,
                            cmr_metadata={},
                            queue="sister-job_worker-32gb",
                            identifier=identifier)


In [76]:
identifier = f'SISTER_AVCL_L2B_GRAINSIZE_20130612T010014_001'
l2a_corfl = 's3://s3.us-west-2.amazonaws.com:80/sister-ops-workspace/LOM/PRODUCTS/AVCL/L2A_CORFL/2013/06/12/SISTER_AVCL_L2A_CORFL_20130612T191820_001'
l2b_frcov = 's3://s3.us-west-2.amazonaws.com:80/sister-ops-workspace/LOM/PRODUCTS/AVCL/L2B_FRCOV/2013/06/12/SISTER_AVCL_L2B_FRCOV_20130612T191820_001'

grainsize_job_response = maap.submitJob(
    algo_id="sister-grainsize",
    version="1.0.0",
    reflectance_dataset= l2a_corfl,
    frcov_dataset= l2b_frcov,
    snow_cover = 0.9,
    crid= "001",
    publish_to_cmr=False,
    cmr_metadata={},
    queue="sister-job_worker-16gb",
    identifier=identifier)

print(grainsize_job_response)

{'job_id': 'f6093543-da14-41a7-b4f5-7bd8c5238176', 'status': 'success', 'machine_type': None, 'architecture': None, 'machine_memory_size': None, 'directory_size': None, 'operating_system': None, 'job_start_time': None, 'job_end_time': None, 'job_duration_seconds': None, 'cpu_usage': None, 'cache_usage': None, 'mem_usage': None, 'max_mem_usage': None, 'swap_usage': None, 'read_io_stats': None, 'write_io_stats': None, 'sync_io_stats': None, 'async_io_stats': None, 'total_io_stats': None, 'error_details': None, 'response_code': 200, 'outputs': []}


In [18]:
preprocess_job_response

{'job_id': '', 'status': 'failed', 'machine_type': None, 'architecture': None, 'machine_memory_size': None, 'directory_size': None, 'operating_system': None, 'job_start_time': None, 'job_end_time': None, 'job_duration_seconds': None, 'cpu_usage': None, 'cache_usage': None, 'mem_usage': None, 'max_mem_usage': None, 'swap_usage': None, 'read_io_stats': None, 'write_io_stats': None, 'sync_io_stats': None, 'async_io_stats': None, 'total_io_stats': None, 'error_details': None, 'response_code': 502, 'outputs': []}