In [21]:
from maap.maap import MAAP
maap = MAAP(maap_host='api.ops.maap-project.org')

# Launch DPS for tile_atl08.py

In [23]:
import os
import geopandas
import pandas as pd
import glob
import datetime
!pip install xmltodict
import xmltodict

[0m

In [24]:
def get_stack_fn(stack_list_fn, in_tile_num):
    # Find most recent topo/Landsat stack path for tile in list of stack paths from *tindex_master.csv
    all_stacks_df = pd.read_csv(stack_list_fn)
    stack_for_tile = all_stacks_df[all_stacks_df['location'].str.contains("_"+str(in_tile_num))]
    [print(i) for i in stack_for_tile.path.to_list()]
    stack_for_tile_fn = stack_for_tile.path.to_list()[0]
    if len(stack_for_tile)==0:
        stack_for_tile_fn = None
    return(stack_for_tile_fn)

# nmt added: code that returns df of landsat locations and tile number
# This is basically CountOutput.py
def get_stack_df(dps_dir, TYPE, dps_year):
    
    if "Landsat" in TYPE:
        root = f"/projects/my-private-bucket/dps_output/do_landsat_stack_3-1-2_ubuntu/ops/{dps_year}/"
        ends_with_str = "_dps.tif"
    if "Topo" in TYPE:
        root = f"/projects/my-private-bucket/dps_output/do_topo_stack_3-1-5_ubuntu/ops/{dps_year}/"
        ends_with_str = "_stack.tif"
    if "ATL08" in TYPE:
        root = f"/projects/my-private-bucket/dps_output/run_extract_ubuntu/ops/{dps_year}/"
        ends_with_str = "0m.csv"
            
    df = pd.DataFrame(columns=['location', 'tile_num'])

    for dir, subdir, files in os.walk(root):
        for fname in files:
            if fname.endswith(ends_with_str): 
                 
                tile_num = fname.split('_')[1]
                   
                if "ATL08" in TYPE:
                    df = df.append({'location':os.path.join(dir+"/", fname)},ignore_index=True)
                else:
                    df = df.append({'location':os.path.join(dir+"/", fname), 'tile_num':tile_num},ignore_index=True)
        
    return df

#### Set the names of the data frames to create

In [25]:
# Topo and Landsat tindex_master csvs from build_tindex_master.py
topo_tindex = "/projects/my-public-bucket/DPS_tile_lists/Topo_tindex_master.csv"
landsat_tindex = "/projects/my-public-bucket/DPS_tile_lists/Landsat_tindex_master.csv"

# Model-ready subset of tiles for which Topo and Landsat coincide
model_ready_tiles_topo = "/projects/my-public-bucket/DPS_tile_lists/model_ready_tiles_topo_paths.csv"
model_ready_tiles_landsat = "/projects/my-public-bucket/DPS_tile_lists/model_ready_tiles_landsat_paths.csv"

## Make the data frames from build_tindex_master.py csvs for Topo and Landsat tiles
python lib/build_tindex_master.py

In [26]:
if os.path.isfile(landsat_tindex) and os.path.isfile(topo_tindex):
    print('Reading existing...')
    ls8_df = pd.read_csv(landsat_tindex)
    topo_df = pd.read_csv(topo_tindex)
else:
    s3_stem = 'https://s3.console.aws.amazon.com/s3/buckets/maap-ops-workspace/nathanmthomas'
    local_stem = '/projects/my-private-bucket'

    ls8_root =  s3_stem + '/dps_output/do_landsat_stack_3-1-2_ubuntu'
    topo_root = s3_stem + '/dps_output/do_topo_stack_3-1-5_ubuntu'
    
    ls8_df = get_stack_df(ls8_root, "Landsat")
    topo_df = get_stack_df(topo_root, "Topo")
topo_df.head()

Reading existing...


Unnamed: 0.1,Unnamed: 0,local_path,tile_num
0,0,/projects/my-private-bucket/dps_output/do_topo...,421
1,1,/projects/my-private-bucket/dps_output/do_topo...,455
2,2,/projects/my-private-bucket/dps_output/do_topo...,456
3,3,/projects/my-private-bucket/dps_output/do_topo...,491
4,4,/projects/my-private-bucket/dps_output/do_topo...,492


In [27]:

topo_df = pd.read_csv(topo_tindex)
topo_df[topo_df.tile_num == 3457].local_path.tolist()[0].replace('/projects/my-private-bucket', 'https://s3.console.aws.amazon.com/s3/buckets/maap-ops-workspace/nathanmthomas')


'https://s3.console.aws.amazon.com/s3/buckets/maap-ops-workspace/nathanmthomas/dps_output/do_topo_stack_3-1-5_ubuntu/ops/2021/07/23/23/32/27/934649/Copernicus_3457_covars_cog_topo_stack.tif'

## Get tile ids for which both Topo and Landsat stacks exist

In [28]:
# added by nmt: get filenames of co-incident landsat and topo
if False:
    topo_sub_df = pd.DataFrame(columns=['local_path','tile_num'])
    ls8_sub_df = pd.DataFrame(columns=['local_path','tile_num'])

    for i in range(len(ls8_df['tile_num'])):
        ls_tile_num = ls8_df['tile_num'][i]
        for j in range(len(topo_df['tile_num'])):
            topo_tile_num = topo_df['tile_num'][j]
            if ls_tile_num == topo_tile_num:
                # Only need to choose one, but we'll do 2 and then check
                ls8_sub_df = ls8_sub_df.append({'local_path':ls8_df['local_path'][i],'tile_num':ls8_df['tile_num'][i].astype(int)}, ignore_index=True)
                topo_sub_df = topo_sub_df.append({'local_path':topo_df['local_path'][j],'tile_num':topo_df['tile_num'][j].astype(int)}, ignore_index=True)

    #ls8_sub_df['tile_num'] = ls8_sub_df['tile_num'].astype(float, errors = 'raise')
    print(ls8_sub_df.head())
    print(topo_sub_df.head())
    print(len(ls8_sub_df),len(topo_sub_df))

    topo_sub_df.to_csv( model_ready_tiles_topo, index=False, encoding='utf-8-sig')
    ls8_sub_df.to_csv( model_ready_tiles_landsat, index=False, encoding='utf-8-sig')

#### Now you have a set of tile ids for which both Landsat and Topo stacks exist

In [29]:
topo_sub_df = pd.read_csv("/projects/my-public-bucket/DPS_tile_lists/model_ready_tiles_topo_paths.csv")
INPUT_TILE_NUM_LIST = topo_sub_df['tile_num'].values.astype(int).tolist()
len(INPUT_TILE_NUM_LIST)

4465

##### Test: get a subset of tile ids for test tiles (Norway and others in NA)

In [101]:
DO_EXPERIMENT = True

NORWAY_TILE_LIST = pd.read_csv('/projects/shared-buckets/lduncanson/misc_files/norway_tiles.csv').layer.tolist()
    
DELTA_TILE_LIST = [3365,3366,3367,3458,3459,3460,3353,3354,3355] + [3361, 3362]
BONA_TILE_LIST  = [3270,3271,3272, 3456,3457,  3363,3364,3365] + [3268, 3269]
HEALY_TILE_LIST = [ 3551,3552,3553,3645,3646,3647] + [3648, 3649, 3555, 3554]

#DELTA_TILE_LIST = [3365,3366,3367,3458,3460,3353,3354,3355,3549]
#BONA_TILE_LIST  = [3270,3271,3272,3364,3456,3457,3458,3364,3365]
#HEALY_TILE_LIST = [3456,3457,3458,3551,3552, 3553,3645,3646,3647]
INPUT_EXPERIMENT_TILE_NUM_LIST = NORWAY_TILE_LIST + DELTA_TILE_LIST + BONA_TILE_LIST + HEALY_TILE_LIST
ALASKA_TILE_LIST =  list(range(3268,3272+1))+\
                    list(range(3361,3366+1))+\
                    list(range(3454,3459+1))+\
                    list(range(3549,3555+1))+\
                    list(range(3643,3648+1))

INPUT_EXPERIMENT_TILE_NUM_LIST = NORWAY_TILE_LIST + ALASKA_TILE_LIST
len(INPUT_EXPERIMENT_TILE_NUM_LIST)

150

#### Read in the latest tindex and compare with a previous set of completed tiles to see which ones still need to be run

In [31]:
import numpy as np
tiles_completed = pd.read_csv('/projects/my-private-bucket/dps_output/run_tile_atl08_ubuntu/master/2022/run_no_LC_height_thresholds/ATL08_filt_tindex_master.csv')
print(f'Tiles completed: {len(tiles_completed)}')
tile_nums_missing = np.setdiff1d(INPUT_TEST_TILE_NUM_LIST, tiles_completed.tile_num)
print(f'Tiles missing: {len(tile_nums_missing)}')
INPUT_TEST_TILE_NUM_LIST = tile_nums_missing.tolist()
len(INPUT_TEST_TILE_NUM_LIST)
#print(INPUT_TEST_TILE_NUM_LIST)

Tiles completed: 128
Tiles missing: 16


16

In [230]:
tindex_master_fn = f'/projects/shared-buckets/lduncanson/DPS_tile_lists/ATL08_tindex_master.csv'
tiles = pd.read_csv(tindex_master_fn)
len(tiles)

46166

In [129]:
tiles_completed_no_LC = pd.read_csv('/projects/my-private-bucket/dps_output/run_tile_atl08_ubuntu/master/2022/run_no_LC_height_thresholds/ATL08_filt_tindex_master.csv')
tiles_completed_LC = pd.read_csv('/projects/my-private-bucket/dps_output/run_tile_atl08_ubuntu/master/2022/run_LC_height_thresholds/ATL08_filt_tindex_master.csv')
print(f"# tiles for no LC:\t{len(tiles_completed_no_LC)}")
print(f"# tiles for LC:\t\t{len(tiles_completed_LC)}")
tile_nums_missing_no_LC = np.setdiff1d(INPUT_TEST_TILE_NUM_LIST, tiles_completed_no_LC.tile_num)
tile_nums_missing_LC = np.setdiff1d(INPUT_TEST_TILE_NUM_LIST, tiles_completed_LC.tile_num)
tile_nums_missing_no_LC = set(INPUT_TEST_TILE_NUM_LIST) - set(tiles_completed_no_LC.tile_num)
tile_nums_missing_LC = set(INPUT_TEST_TILE_NUM_LIST) - set(tiles_completed_LC.tile_num)
print(f"tiles missing for no LC:\t{tile_nums_missing_no_LC}")
print(f"tiles missing for LC:\t\t{tile_nums_missing_LC}")

#print(f"tiles for no LC:\t{tiles_completed_no_LC.tile_num}")
#print(f"tiles for LC:\t\t{tiles_completed_LC.tile_num}")

# The missing tiles common to both runs probably wont process b/c they have no ATL08 over land, or no corresponding Landsat or Topo tiles.
# Those missing that are different in each set need to be run
DPS_INPUT_TILE_NUM_LIST_no_LC = list(set(tiles_completed_LC.tile_num) - set(tiles_completed_no_LC.tile_num))
DPS_INPUT_TILE_NUM_LIST_LC = list(set(tiles_completed_no_LC.tile_num) - set(tiles_completed_LC.tile_num))
print(f"Tiles still needed for no LC run: {DPS_INPUT_TILE_NUM_LIST_no_LC}")
print(f"Tiles still needed for LC run: {DPS_INPUT_TILE_NUM_LIST_LC}")


# tiles for no LC:	132
# tiles for LC:		133
tiles missing for no LC:	{5, 6, 7, 328, 26025, 301, 14, 26574, 178, 275, 21, 3549}
tiles missing for LC:		{5, 6, 7, 328, 357, 10, 26025, 301, 14, 26574, 178, 275, 21}
Tiles still needed for no LC run: [3549]
Tiles still needed for LC run: [10, 357]


In [202]:
month_dir_str = 'run_LC_height_thresholds'
index_out_dir = os.path.join('/projects/my-private-bucket/dps_output/run_tile_atl08_ubuntu/master/2022', month_dir_str)
!python /projects/icesat2_boreal/lib/build_tindex_master.py -t ATL08_filt -y 2022 -m $month_dir_str -o $index_out_dir

  shapely_geos_version, geos_capi_version_string

Building a list of tiles:  ATL08_filt

Output dir:  /projects/my-private-bucket/dps_output/run_tile_atl08_ubuntu/master/2022/run_LC_height_thresholds
                                              s3_path  ...                                               file
0   s3://maap-ops-workspace/lduncanson/dps_output/...  ...  atl08_005_30m_filt_topo_landsat_20220312_0043.csv
2   s3://maap-ops-workspace/lduncanson/dps_output/...  ...  atl08_005_30m_filt_topo_landsat_20220312_0054.csv
5   s3://maap-ops-workspace/lduncanson/dps_output/...  ...  atl08_005_30m_filt_topo_landsat_20220312_0065.csv
7   s3://maap-ops-workspace/lduncanson/dps_output/...  ...  atl08_005_30m_filt_topo_landsat_20220312_0030.csv
10  s3://maap-ops-workspace/lduncanson/dps_output/...  ...  atl08_005_30m_filt_topo_landsat_20220312_0042.csv

[5 rows x 3 columns]
# of duplicate tiles: 23
Final # of tiles: 133
df shape :                                               s3_path  ... t

In [33]:
TEST_DPS  = True

if TEST_DPS:
    DPS_INPUT_TILE_NUM_LIST = INPUT_TEST_TILE_NUM_LIST
    
    if True:
        #!python /projects/icesat2_boreal/lib/build_tindex_master_v2.py -t ATL08_filt -y 2022 -m $month_dir_str -o $index_out_dir
        t = pd.read_csv(os.path.join(index_out_dir,'ATL08_filt_tindex_master.csv'))
        COMPLETED_TILES = t.tile_num.to_list()
        NEED_TILES = list(set(DPS_INPUT_TILE_NUM_LIST) - set(COMPLETED_TILES))

        print(NEED_TILES)
        DPS_INPUT_TILE_NUM_LIST = NEED_TILES
    
else:
    DPS_INPUT_TILE_NUM_LIST = INPUT_TILE_NUM_LIST
   

#DPS_INPUT_TILE_NUM_LIST = [248, 273, 272, 271, 324]
print(f"# of tiles to run: {len(DPS_INPUT_TILE_NUM_LIST)}\n", DPS_INPUT_TILE_NUM_LIST)


[3552, 4, 5, 6, 7, 328, 9, 26025, 301, 14, 26574, 177, 178, 275, 21, 3549]
# of tiles to run: 16
 [3552, 4, 5, 6, 7, 328, 9, 26025, 301, 14, 26574, 177, 178, 275, 21, 3549]


#### Customize the DPS run: set up the parameters dictionary

In [188]:
# Norway test 01
# Just include sol_el so we can use sol_el < 5
in_param_dict_norway01 = {
                        'in_tile_num': '',
                        'in_tile_fn': 'https://maap-ops-workspace.s3.amazonaws.com/shared/nathanmthomas/boreal_tiles_v003.gpkg',
                        'in_tile_layer': 'boreal_tiles_v003',
                        'csv_list_fn': 's3://maap-ops-workspace/shared/lduncanson/DPS_tile_lists/ATL08_tindex_master.csv',
                        'topo_stack_list_fn': 's3://maap-ops-workspace/shared/nathanmthomas/DPS_tile_lists/Topo_tindex_master.csv',
                        'landsat_stack_list_fn': 's3://maap-ops-workspace/shared/nathanmthomas/DPS_tile_lists/Landsat_tindex_master.csv',
                        'years_list': '2019 2020 2021',
                        'user_stacks': 'nathanmthomas',
                        'user_atl08': 'lduncanson',
                        'thresh_sol_el': 5,
                        'v_ATL08': 5,
                        'minmonth': 4,
                        'maxmonth': 10,
                        'LC_filter': False
    }
# Norway test 02
# Use v005 ATL08, which will apply lc-based thresholds, extend to all months
# NOTE!! make sure you manually update to use the correct filter in tile_atl08.py
in_param_dict_norway02 = {
                        'in_tile_num': '',
                        'in_tile_fn': 'https://maap-ops-workspace.s3.amazonaws.com/shared/nathanmthomas/boreal_tiles_v003.gpkg',
                        'in_tile_layer': 'boreal_tiles_v003',
                        'csv_list_fn': 's3://maap-ops-workspace/shared/lduncanson/DPS_tile_lists/ATL08_tindex_master.csv',
                        'topo_stack_list_fn': 's3://maap-ops-workspace/shared/nathanmthomas/DPS_tile_lists/Topo_tindex_master.csv',
                        'landsat_stack_list_fn': 's3://maap-ops-workspace/shared/nathanmthomas/DPS_tile_lists/Landsat_tindex_master.csv',
                        'years_list': '2019 2020 2021',
                        'user_stacks': 'nathanmthomas',
                        'user_atl08': 'lduncanson',
                        'thresh_sol_el': 5,
                        'v_ATL08': 5,
                        'minmonth': 4,
                        'maxmonth': 10,
                        'LC_filter': True
    }

In [189]:
in_param_dict = in_param_dict_norway02
in_param_dict

{'in_tile_num': '',
 'in_tile_fn': 'https://maap-ops-workspace.s3.amazonaws.com/shared/nathanmthomas/boreal_tiles_v003.gpkg',
 'in_tile_layer': 'boreal_tiles_v003',
 'csv_list_fn': 's3://maap-ops-workspace/shared/lduncanson/DPS_tile_lists/ATL08_tindex_master.csv',
 'topo_stack_list_fn': 's3://maap-ops-workspace/shared/nathanmthomas/DPS_tile_lists/Topo_tindex_master.csv',
 'landsat_stack_list_fn': 's3://maap-ops-workspace/shared/nathanmthomas/DPS_tile_lists/Landsat_tindex_master.csv',
 'years_list': '2019 2020 2021',
 'user_stacks': 'nathanmthomas',
 'user_atl08': 'lduncanson',
 'thresh_sol_el': 5,
 'v_ATL08': 5,
 'minmonth': 4,
 'maxmonth': 10,
 'LC_filter': True}

## Run a DPS job across the list

In [190]:
%%time
submit_results_df_list = []
len_input_list = len(DPS_INPUT_TILE_NUM_LIST)
print(f"# of input tiles for DPS: {len_input_list}")

for i, INPUT_TILE_NUM in enumerate(DPS_INPUT_TILE_NUM_LIST):

    DPS_num = i+1
    IDENTIFIER = 'run_tile_atl08'
    ALGO_ID = f'{IDENTIFIER}_ubuntu'
    USER = 'lduncanson'
    WORKER_TYPE = 'maap-dps-worker-16gb'
    
    in_param_dict['in_tile_num'] = INPUT_TILE_NUM
    
    submit_result = maap.submitJob(
            identifier=IDENTIFIER,
            algo_id=ALGO_ID,
            version='master',
            username=USER, # username needs to be the same as whoever created the workspace
            queue=WORKER_TYPE,
            **in_param_dict
        )
    
    # Build a dataframe of submission details
    submit_result['dps_num'] = DPS_num
    submit_result['tile_num'] = INPUT_TILE_NUM
    submit_result['submit_time'] = datetime.datetime.now()
    submit_result['dbs_job_hour'] =datetime.datetime.now().hour
    submit_result['algo_id'] = ALGO_ID
    submit_result['user'] = USER
    submit_result['worker_type'] = WORKER_TYPE
    
    # Append to a list of data frames of submission results
    submit_results_df_list.append(pd.DataFrame([submit_result]))
    
    if DPS_num in [1, 25, 50, 100, 500, 1000, 1500, 2000, 3000, 5000, 7000, 9000, 11000, 13000, 15000, 17000, 19000, 21000, 24000, len_input_list]:
        print(f"DPS run #: {DPS_num}\t| tile num: {INPUT_TILE_NUM}\t| submit status: {submit_result['status']}\t| job id: {submit_result['job_id']}") 

# Build a final submission results df and save
submit_results_df = pd.concat(submit_results_df_list)
nowtime = pd.Timestamp.now().strftime('%Y%m%d%H%M')
print(f"Current time:\t{nowtime}")
submit_results_df.to_csv(f'/projects/my-public-bucket/DPS_{IDENTIFIER}_submission_results_{len_input_list}_{nowtime}.csv')
submit_results_df

# of input tiles for DPS: 2
DPS run #: 1	| tile num: 10	| submit status: success	| job id: 846a76f0-408e-47f3-9793-a95e22517c08
DPS run #: 2	| tile num: 357	| submit status: success	| job id: 32dba8a0-251d-4056-ac8f-80a4f9bf78d0
Current time:	202203171706
CPU times: user 23.8 ms, sys: 6.72 ms, total: 30.5 ms
Wall time: 433 ms


Unnamed: 0,status,http_status_code,job_id,dps_num,tile_num,submit_time,dbs_job_hour,algo_id,user,worker_type
0,success,200,846a76f0-408e-47f3-9793-a95e22517c08,1,10,2022-03-17 17:06:57.631194,17,run_tile_atl08_ubuntu,lduncanson,maap-dps-worker-16gb
0,success,200,32dba8a0-251d-4056-ac8f-80a4f9bf78d0,2,357,2022-03-17 17:06:57.716327,17,run_tile_atl08_ubuntu,lduncanson,maap-dps-worker-16gb


After almost any DPS job, you have to assess what succeeded and failed. This involves:
1. building a table of job status based on job ids captured in the job_results_df from the DPS run chunk (this takes 40 mins for ~47k jobs) --> this tells you how many jobs failed
2. merging the job status table with the job results df --> this tells you which specific granules (or tile nums) failed
3. building another input list of granules for a follow-up DPS
## Assess DPS results
Build a table of job status based on job id - how many jobs failed?

In [201]:
%%time
def BUILD_TABLE_JOBSTATUS(submit_results_df):
    import xmltodict
    df = pd.concat([pd.DataFrame(xmltodict.parse(maap.getJobStatus(job_id).content)).transpose() for job_id in submit_results_df.job_id.to_list()])
    return df

job_status_df = BUILD_TABLE_JOBSTATUS(submit_results_df)
#print(job_status_df.head())

num_jobs = submit_results_df.shape[0]
z = submit_results_df.merge(job_status_df, how='left', left_on='job_id',  right_on='wps:JobID')

print(f'Count total jobs:\t{num_jobs}')
print(f"Count pending jobs:\t{z[z['wps:Status'] =='Accepted'].shape[0]}")
print(f"Count running jobs:\t{z[z['wps:Status'] =='Running'].shape[0]}")
print(f"Count succeeded jobs:\t{z[z['wps:Status'] =='Succeeded'].shape[0]}")
print(f"Count failed jobs:\t{z[z['wps:Status'] =='Failed'].shape[0]}")
print(f"% of failed jobs:\t{round(z[z['wps:Status'] =='Failed'].shape[0] / ( z[z['wps:Status'] =='Failed'].shape[0] + z[z['wps:Status'] =='Succeeded'].shape[0] ), 4) * 100}")

Count total jobs:	2
Count pending jobs:	0
Count running jobs:	0
Count succeeded jobs:	2
Count failed jobs:	0
% of failed jobs:	0.0
CPU times: user 32.9 ms, sys: 0 ns, total: 32.9 ms
Wall time: 103 ms


In [90]:
xmltodict.parse(maap.getJobResult(z[z['wps:Status'] =='Succeeded'].iloc[0].job_id).content)

OrderedDict([('wps:Result',
              OrderedDict([('@xmlns:ows', 'http://www.opengis.net/ows/2.0'),
                           ('@xmlns:schemaLocation',
                            'http://schemas.opengis.net/wps/2.0/wps.xsd'),
                           ('@xmlns:wps', 'http://www.opengis.net/wps/2.0'),
                           ('@xmlns:xsi',
                            'http://www.w3.org/2001/XMLSchema-instance'),
                           ('wps:JobID',
                            '7523cb62-fc0f-44e9-b54e-b35f6e54fba8'),
                           ('wps:Output',
                            OrderedDict([('@id',
                                          'output-2022-03-17T02:53:00.066869'),
                                         ('wps:Data',
                                          ['http://maap-ops-workspace.s3-website-us-west-2.amazonaws.com/lduncanson/dps_output/run_tile_atl08_ubuntu/master/2022/03/17/02/53/00/066869',
                                           's3://s3.us

In [89]:
xmltodict.parse(maap.getJobResult(z[z['wps:Status'] =='Failed'].iloc[1].job_id).content)

OrderedDict([('wps:Result',
              OrderedDict([('@xmlns:ows', 'http://www.opengis.net/ows/2.0'),
                           ('@xmlns:schemaLocation',
                            'http://schemas.opengis.net/wps/2.0/wps.xsd'),
                           ('@xmlns:wps', 'http://www.opengis.net/wps/2.0'),
                           ('@xmlns:xsi',
                            'http://www.w3.org/2001/XMLSchema-instance'),
                           ('wps:JobID',
                            'f7409afd-53df-4423-8300-97a9eda7430e'),
                           ('wps:Output',
                            OrderedDict([('@id', 'traceback'),
                                         ('wps:Data',
                                          'activate does not accept more than one argument:\n[\'/app/icesat2_boreal/dps/alg_2-4/run_tile_atl08.sh\', \'328\', \'boreal_tiles_v003\', \'s3://maap-ops-workspace/shared/lduncanson/DPS_tile_lists/ATL08_tindex_master.csv\', \'s3://maap-ops-workspace/shared/natha

In [75]:
!python /projects/icesat2_boreal/lib/build_tindex_master_v2.py -t ATL08_filt -y 2022 -m 3 -o /projects/test_dps

  shapely_geos_version, geos_capi_version_string

Building a list of tiles:  ATL08_filt

Output dir:  /projects/test_dps
  df['tile_num'] = df['file'].str.split('_', expand=True)[7].str.replace('.csv','')
# of duplicate tiles: 4
Final # of tiles: 103
df shape :                                              s3_path  ... tile_num
0  s3://maap-ops-workspace/lduncanson/dps_output/...  ...     0054
2  s3://maap-ops-workspace/lduncanson/dps_output/...  ...     0043
4  s3://maap-ops-workspace/lduncanson/dps_output/...  ...     0031
6  s3://maap-ops-workspace/lduncanson/dps_output/...  ...     0042
8  s3://maap-ops-workspace/lduncanson/dps_output/...  ...     0052

[5 rows x 4 columns]
Writing tindex master csv: /projects/test_dps/ATL08_filt_tindex_master.csv


[387, 388, 5, 6, 7, 10, 14, 16, 275, 21, 25, 27, 28, 29, 416, 37, 38, 296, 26025, 299, 300, 301, 48, 177, 50, 178, 60, 198, 3270, 72, 326, 327, 328, 26574, 354, 355, 356, 357, 247]


In [153]:
%%time
TILE_NUM = 131 #NEED_TILES[6]
args = f"\
-LC_filter True \
--extract_covars \
--do_30m \
--do_dps \
-years_list 2019 2020 2021 \
-o /projects/my-public-bucket/atl08_filt_covar_tiles \
-in_tile_num {TILE_NUM} \
-in_tile_fn /projects/shared-buckets/nathanmthomas/boreal_tiles_v003.gpkg \
-in_tile_layer boreal_tiles_v003 \
-in_tile_id_col tile_num \
-csv_list_fn /projects/shared-buckets/lduncanson/DPS_tile_lists/ATL08_tindex_master.csv \
-topo_stack_list_fn /projects/shared-buckets/nathanmthomas/DPS_tile_lists/Topo_tindex_master.csv \
-landsat_stack_list_fn /projects/shared-buckets/nathanmthomas/DPS_tile_lists/Landsat_tindex_master.csv \
-user_stacks nathanmthomas \
-user_atl08 lduncanson \
-thresh_sol_el 5 \
-v_ATL08 5 -minmonth 4 -maxmonth 10"
print(args)
!python /projects/icesat2_boreal/lib/tile_atl08.py $args

-LC_filter True --extract_covars --do_30m --do_dps -years_list 2019 2020 2021 -o /projects/my-public-bucket/atl08_filt_covar_tiles -in_tile_num 131 -in_tile_fn /projects/shared-buckets/nathanmthomas/boreal_tiles_v003.gpkg -in_tile_layer boreal_tiles_v003 -in_tile_id_col tile_num -csv_list_fn /projects/shared-buckets/lduncanson/DPS_tile_lists/ATL08_tindex_master.csv -topo_stack_list_fn /projects/shared-buckets/nathanmthomas/DPS_tile_lists/Topo_tindex_master.csv -landsat_stack_list_fn /projects/shared-buckets/nathanmthomas/DPS_tile_lists/Landsat_tindex_master.csv -user_stacks nathanmthomas -user_atl08 lduncanson -thresh_sol_el 5 -v_ATL08 5 -minmonth 4 -maxmonth 10
  shapely_geos_version, geos_capi_version_string

Land cover filtering set to: True

Working on tile:	 131
From layer:		 boreal_tiles_v003
In vector file:		 /projects/shared-buckets/nathanmthomas/boreal_tiles_v003.gpkg
ATL08 version:		 5
Season start:		 04-01
Season end:		 10-31
Years:			 [2019, 2020, 2021]
ATL08 bin length:	 3