In [1]:
from maap.maap import MAAP
maap = MAAP(maap_host='api.ops.maap-project.org')

# Launch DPS for mapBoreal.R

In [2]:
import os
import geopandas
import pandas as pd
import json
from shutil import copy

def local_to_s3(url, user='lduncanson'):
    ''' A Function to convert local paths to s3 urls'''
    return url.replace('/projects/my-private-bucket', f's3://maap-ops-workspace/{user}')
def local_to_https(url, user='lduncanson'):
    ''' A Function to convert local paths to https urls'''
    return url.replace('/projects/my-private-bucket', f'https://s3.console.aws.amazon.com/s3/buckets/maap-ops-workspace/{user}')
def local_to_https_uswest2(url, user='lduncanson'):
    ''' A Function to convert local paths to https us-west-s urls'''
    return url.replace('/projects/my-private-bucket', f'https://maap-ops-workspace.s3.us-west-2.amazonaws.com/{user}')

  shapely_geos_version, geos_capi_version_string


In [3]:
atl08_filt_tindex_master =   pd.read_csv('s3://maap-ops-workspace/shared/lduncanson/DPS_tile_lists/ATL08_filt_tindex_master.csv')
topo_tindex_master =         pd.read_csv('s3://maap-ops-workspace/shared/nathanmthomas/DPS_tile_lists/Topo_tindex_master.csv')
landsat_tindex_master =      pd.read_csv('s3://maap-ops-workspace/shared/nathanmthomas/DPS_tile_lists/Landsat_tindex_master.csv')

# Convert al local_paths to s3 and https
atl08_filt_tindex_master['s3'] = [local_to_s3(local_path, user='lduncanson') for local_path in atl08_filt_tindex_master['local_path']]
#.. for data produced by 'lduncanson' workspace
atl08_filt_tindex_master['https'] = [local_to_https_uswest2(local_path, user='lduncanson') for local_path in atl08_filt_tindex_master['local_path']]

#.. for data produced by 'nathanmthomas' workspace
for tindex_master in [topo_tindex_master, landsat_tindex_master]:
    tindex_master['s3'] = [local_to_s3(local_path, user='nathanmthomas') for local_path in tindex_master['local_path']]
    tindex_master['https'] = [local_to_https_uswest2(local_path, user='nathanmthomas') for local_path in tindex_master['local_path']]

# Use the ATL08 filtered tindex master list to tell you which tiles you'll run

In [4]:
INPUT_TILE_NUM_LIST = atl08_filt_tindex_master['tile_num'].values.astype(int).tolist()
print(len(INPUT_TILE_NUM_LIST))

# Remove duplicate tile nums
INPUT_TILE_NUM_LIST = list(set(INPUT_TILE_NUM_LIST))

print(len(INPUT_TILE_NUM_LIST))

3972
3937


In [23]:
# Check s3 with a tile_num
in_atl08_s3 =  atl08_filt_tindex_master['s3'].loc[atl08_filt_tindex_master['tile_num'] == INPUT_TILE_NUM_LIST[0]].tolist()[0]
in_topo_s3 = topo_tindex_master['s3'].loc[topo_tindex_master['tile_num'] == INPUT_TILE_NUM_LIST[0]].tolist()[0]
in_landsat_s3 = landsat_tindex_master['s3'].loc[landsat_tindex_master['tile_num'] == INPUT_TILE_NUM_LIST[0]].tolist()[0]
print(in_atl08_s3)
print(in_topo_s3)
print(in_landsat_s3)
# Check retrieval of http s3 path with a tle_num
in_atl08_https =  atl08_filt_tindex_master['https'].loc[atl08_filt_tindex_master['tile_num'] == INPUT_TILE_NUM_LIST[0]].tolist()[0]
in_topo_https = topo_tindex_master['https'].loc[topo_tindex_master['tile_num'] == INPUT_TILE_NUM_LIST[0]].tolist()[0]
in_landsat_https = landsat_tindex_master['https'].loc[landsat_tindex_master['tile_num'] == INPUT_TILE_NUM_LIST[0]].tolist()[0]
print(in_atl08_https)
print(in_topo_https)
print(in_landsat_https)

s3://maap-ops-workspace/lduncanson/dps_output/run_tile_atl08_ubuntu/master/2021/09/25/08/37/36/740304/atl08_004_30m_filt_topo_landsat_20210925_0009.csv
s3://maap-ops-workspace/nathanmthomas/dps_output/do_topo_stack_3-1-5_ubuntu/ops/2021/09/15/18/10/34/658640/Copernicus_9_covars_cog_topo_stack.tif
s3://maap-ops-workspace/nathanmthomas/dps_output/do_landsat_stack_3-1-2_ubuntu/ops/2021/09/14/19/20/02/503587/Landsat8_9_comp_cog_2015-2020_dps.tif
https://maap-ops-workspace.s3.us-west-2.amazonaws.com/lduncanson/dps_output/run_tile_atl08_ubuntu/master/2021/09/25/08/37/36/740304/atl08_004_30m_filt_topo_landsat_20210925_0009.csv
https://maap-ops-workspace.s3.us-west-2.amazonaws.com/nathanmthomas/dps_output/do_topo_stack_3-1-5_ubuntu/ops/2021/09/15/18/10/34/658640/Copernicus_9_covars_cog_topo_stack.tif
https://maap-ops-workspace.s3.us-west-2.amazonaws.com/nathanmthomas/dps_output/do_landsat_stack_3-1-2_ubuntu/ops/2021/09/14/19/20/02/503587/Landsat8_9_comp_cog_2015-2020_dps.tif


## Get file for boreal biomass models


In [17]:
bio_models_https = 'https://s3.console.aws.amazon.com/s3/buckets/maap-ops-workspace/lduncanson/my-private-bucket/bio_models.tar'

## Run a DPS job

In [16]:
RUN_DPS  = True

if RUN_DPS:
    ##################################
    #Test DPS submission on a single file
    for i, INPUT_TILE_NUM in enumerate(INPUT_TILE_NUM_LIST):
        DPS_num = i+1
        
        # Get the s3 paths of the corresponding input filenames with an input tile_num
        in_atl08_https =  atl08_filt_tindex_master['https'].loc[atl08_filt_tindex_master['tile_num'] == INPUT_TILE_NUM].tolist()[0]
        in_topo_https = topo_tindex_master['https'].loc[topo_tindex_master['tile_num'] == INPUT_TILE_NUM].tolist()[0]
        in_landsat_https = landsat_tindex_master['https'].loc[landsat_tindex_master['tile_num'] == INPUT_TILE_NUM].tolist()[0]
        
        #print(in_atl08_https) 
        #print(in_topo_https)
        #print(in_landsat_https)
        
        if True:
            in_param_dict = {
                                    'in_atl08_fn': f"input/{os.path.basename(in_atl08_https)}",
                                    'in_topo_fn': f"input/{os.path.basename(in_topo_https)}",
                                    'in_landsat_fn': f"input/{os.path.basename(in_landsat_https)}",
                                    'in_atl08_fn_url': in_atl08_https,
                                    'in_topo_fn_url': in_topo_https,
                                    'in_landsat_fn_url': in_landsat_https
                }

            submit_result = maap.submitJob(
                    identifier='run_boreal_biomass',
                    algo_id='run_boreal_biomass_ubuntu',
                    version='master',
                    username='lduncanson', # username needs to be the same as whoever created the workspace
                    queue='maap-dps-worker-32gb',
                    **in_param_dict
                )

            #submit_result = 'submit test'
            if DPS_num in [1, 10, 50, 100,200, 500,1000,3000,4000, len(INPUT_TILE_NUM_LIST)]:
                print(f"DPS run num: {DPS_num}, tile num: {INPUT_TILE_NUM}, job info: {submit_result}") 
                print(f"DPS job status: {maap.getJobStatus(submit_result.get('job_id')) }" )
 

DPS run num: 1, tile num: 1924, job info: {'status': 'success', 'http_status_code': 200, 'job_id': '1dcb20c3-bae8-43c1-8eb3-28721b1b1d01'}
DPS job status: <Response [200]>
DPS run num: 10, tile num: 274, job info: {'status': 'success', 'http_status_code': 200, 'job_id': 'f5f56a9c-0849-4b7e-9bcb-cd7d95033759'}
DPS job status: <Response [200]>
DPS run num: 50, tile num: 3580, job info: {'status': 'success', 'http_status_code': 200, 'job_id': 'd1aab9cc-f3ca-47d2-aff9-ba3bdbb78a80'}
DPS job status: <Response [200]>


## Get another list just of missing tiles

In [4]:
need_tindex_master = pd.read_csv('/projects/my-public-bucket/DPS_tile_lists/Need_tindex_master.csv')
print(len(need_tindex_master))

INPUT_TILE_NUM_LIST = need_tindex_master.tile_num.tolist()

# Remove duplicate tile nums
INPUT_TILE_NUM_LIST = list(set(INPUT_TILE_NUM_LIST))

len(INPUT_TILE_NUM_LIST)

51


50

In [5]:
len(atl08_filt_tindex_master)

tile_index_missing = atl08_filt_tindex_master[atl08_filt_tindex_master['tile_num'].isin(INPUT_TILE_NUM_LIST)]
print(f'Missing: \t\t{len(tile_index_missing)}')

Missing: 		51


# Run tests of tile and mapBoreal?

#### 1. Run the first part (tile_atl08.py) in LDs workspace (COGs are accessed from nathan's private bucket using the s3 libs and s3 paths)
#### 2. Copy COGs locally so they are read by mapBoreal.R

In [10]:
# Get s3 with a tile_num
INPUT_TILE_NUM_LIST = [269] #[2891, 2831, 885]

for INPUT_TILE_NUM in INPUT_TILE_NUM_LIST:
    

    #in_atl08_s3 =  atl08_filt_tindex_master['s3'].loc[atl08_filt_tindex_master['tile_num'] == INPUT_TILE_NUM].tolist()[0]
    in_topo_fn = topo_tindex_master['local_path'].loc[topo_tindex_master['tile_num'] == INPUT_TILE_NUM].tolist()[0]
    in_landsat_fn = landsat_tindex_master['local_path'].loc[landsat_tindex_master['tile_num'] == INPUT_TILE_NUM].tolist()[0]
    #print(in_atl08_s3)
    print(in_topo_fn)
    print(in_landsat_fn)

    to_dir_cog = '/projects/my-public-bucket/in_stacks_copy'
    in_topo_fn_local_copy = os.path.join(to_dir_cog, os.path.basename(in_topo_fn)) 
    in_landsat_fn_local_copy = os.path.join(to_dir_cog, os.path.basename(in_landsat_fn)) 
    print(os.path.join(to_dir_cog, os.path.basename(in_topo_fn)))

    if True:
        # This copy is run in 'nathanmthomas' workspace 
        if not os.path.exists(to_dir_cog):
            os.makedirs(to_dir_cog)
        copy(in_topo_fn, os.path.join(to_dir_cog, os.path.basename(in_topo_fn)))
        copy(in_landsat_fn, os.path.join(to_dir_cog, os.path.basename(in_landsat_fn))) 

    # This is how to access those copied COGs from 'lduncanson' workspace
    in_topo_fn_local_copy = in_topo_fn_local_copy.replace('my-public-bucket','shared-buckets/nathanmthomas')
    in_landsat_fn_local_copy = in_landsat_fn_local_copy.replace('my-public-bucket','shared-buckets/nathanmthomas')
    print(f'/projects/icesat2_boreal/dps/alg_3-4-test/test_tiles.sh {INPUT_TILE_NUM} {in_topo_fn_local_copy} {in_landsat_fn_local_copy}')

/projects/my-private-bucket/dps_output/do_topo_stack_3-1-5_ubuntu/ops/2021/09/15/19/08/46/342487/Copernicus_269_covars_cog_topo_stack.tif
/projects/my-private-bucket/dps_output/do_landsat_stack_3-1-2_ubuntu/ops/2021/09/14/19/54/44/275704/Landsat8_269_comp_cog_2015-2020_dps.tif
/projects/my-public-bucket/in_stacks_copy/Copernicus_269_covars_cog_topo_stack.tif
/projects/icesat2_boreal/dps/alg_3-4-test/test_tiles.sh 269 /projects/shared-buckets/nathanmthomas/in_stacks_copy/Copernicus_269_covars_cog_topo_stack.tif /projects/shared-buckets/nathanmthomas/in_stacks_copy/Landsat8_269_comp_cog_2015-2020_dps.tif


In [11]:
atl08_test = pd.read_csv('s3://maap-ops-workspace/lduncanson/dps_output/run_tile_atl08_ubuntu/master/2021/09/25/08/37/36/740304/atl08_004_30m_filt_topo_landsat_20210925_0009.csv')
len(atl08_test)

2388