In [1]:
from maap.maap import MAAP
maap = MAAP(maap_host='api.ops.maap-project.org')

# Launch DPS for mapBoreal.R

In [2]:
import os
import geopandas
import pandas as pd
import json

def local_to_s3(url, user='lduncanson'):
    ''' A Function to convert local paths to s3 urls'''
    return url.replace('/projects/my-private-bucket', f's3://maap-ops-workspace/{user}')
def local_to_https(url, user='lduncanson'):
    ''' A Function to convert local paths to https urls'''
    return url.replace('/projects/my-private-bucket', f'https://s3.console.aws.amazon.com/s3/buckets/maap-ops-workspace/{user}')
def local_to_https_uswest2(url, user='lduncanson'):
    ''' A Function to convert local paths to https us-west-s urls'''
    return url.replace('/projects/my-private-bucket', f'https://maap-ops-workspace.s3.us-west-2.amazonaws.com/{user}')

  shapely_geos_version, geos_capi_version_string


In [3]:
atl08_filt_tindex_master =   pd.read_csv('s3://maap-ops-workspace/shared/lduncanson/DPS_tile_lists/ATL08_filt_tindex_master.csv')
topo_tindex_master =         pd.read_csv('s3://maap-ops-workspace/shared/nathanmthomas/DPS_tile_lists/Topo_tindex_master.csv')
landsat_tindex_master =      pd.read_csv('s3://maap-ops-workspace/shared/nathanmthomas/DPS_tile_lists/Landsat_tindex_master.csv')

# Convert al local_paths to s3
#.. for data produced by 'lduncanson' workspace
atl08_filt_tindex_master['https'] = [local_to_https_uswest2(local_path, user='lduncanson') for local_path in atl08_filt_tindex_master['local_path']]

#.. for data produced by 'nathanmthomas' workspace
for tindex_master in [topo_tindex_master, landsat_tindex_master]:
    tindex_master['https'] = [local_to_https_uswest2(local_path, user='nathanmthomas') for local_path in tindex_master['local_path']]

# Use the ATL08 filtered tindex master list to tell you which tiles you'll run

In [7]:
INPUT_TILE_NUM_LIST = atl08_filt_tindex_master['tile_num'].values.astype(int).tolist()
print(len(INPUT_TILE_NUM_LIST))

# Remove duplicate tile nums
INPUT_TILE_NUM_LIST = list(set(INPUT_TILE_NUM_LIST))

# Filter to only tiles of interest
TEST_INPUT_TILE_NUM_LIST = [42, 199, 200, 223, 224, 386, 387, 11, 49, 111, 112, 131]
print(TEST_INPUT_TILE_NUM_LIST)
print(len(INPUT_TILE_NUM_LIST))

#for test, set
INPUT_TILE_NUM_LIST = TEST_INPUT_TILE_NUM_LIST
print(len(INPUT_TILE_NUM_LIST))


3968
[42, 199, 200, 223, 224, 386, 387, 11, 49, 111, 112, 131]
3940
12


In [5]:
# Check retrieval of s3 path with a tle_num
in_atl08_https =  atl08_filt_tindex_master['https'].loc[atl08_filt_tindex_master['tile_num'] == INPUT_TILE_NUM_LIST[0]].tolist()[0]
in_topo_https = topo_tindex_master['https'].loc[topo_tindex_master['tile_num'] == INPUT_TILE_NUM_LIST[0]].tolist()[0]
in_landsat_https = landsat_tindex_master['https'].loc[landsat_tindex_master['tile_num'] == INPUT_TILE_NUM_LIST[0]].tolist()[0]
print(in_atl08_https)
print(in_topo_https)
print(in_landsat_https)

https://maap-ops-workspace.s3.us-west-2.amazonaws.com/lduncanson/dps_output/run_tile_atl08_ubuntu/master/2021/10/11/23/50/48/232289/atl08_004_30m_filt_topo_landsat_20211011_0042.csv
https://maap-ops-workspace.s3.us-west-2.amazonaws.com/nathanmthomas/dps_output/do_topo_stack_3-1-5_ubuntu/ops/2021/09/15/18/18/44/208345/Copernicus_42_covars_cog_topo_stack.tif
https://maap-ops-workspace.s3.us-west-2.amazonaws.com/nathanmthomas/dps_output/do_landsat_stack_3-1-2_ubuntu/ops/2021/09/14/19/10/42/604491/Landsat8_42_comp_cog_2015-2020_dps.tif


## Get files for boreal biomass models & boreal wide sample


In [6]:
bio_models_https = 'https://s3.console.aws.amazon.com/s3/buckets/maap-ops-workspace/lduncanson/my-private-bucket/bio_models.tar'

train_data_https = 'https://maap-ops-workspace.s3.us-west-2.amazonaws.com/lduncanson/boreal_train_data_v3.csv'

## Run a DPS job

In [8]:
RUN_DPS  = True

if RUN_DPS:
    ##################################
    #Test DPS submission on a single file
    #for i, INPUT_TILE_NUM in enumerate(INPUT_TILE_NUM_LIST):
    for i, INPUT_TILE_NUM in enumerate(INPUT_TILE_NUM_LIST):
        DPS_num = i+1
        
        # Get the s3 paths of the corresponding input filenames with an input tile_num
        in_atl08_https =  atl08_filt_tindex_master['https'].loc[atl08_filt_tindex_master['tile_num'] == INPUT_TILE_NUM].tolist()[0]
        in_topo_https = topo_tindex_master['https'].loc[topo_tindex_master['tile_num'] == INPUT_TILE_NUM].tolist()[0]
        in_landsat_https = landsat_tindex_master['https'].loc[landsat_tindex_master['tile_num'] == INPUT_TILE_NUM].tolist()[0]
        
        #print(in_atl08_https) 
        #print(in_topo_https)
        #print(in_landsat_https)
        
        if True:
            in_param_dict = {
                                    'in_atl08_fn': f"input/{os.path.basename(in_atl08_https)}",
                                    'in_topo_fn': f"input/{os.path.basename(in_topo_https)}",
                                    'in_landsat_fn': f"input/{os.path.basename(in_landsat_https)}",
                                    'in_atl08_fn_url': in_atl08_https,
                                    'in_topo_fn_url': in_topo_https,
                                    'in_landsat_fn_url': in_landsat_https,
                                    'DO_SLOPE_VALID_MASK': 'TRUE',
                                    'in_atl08_sample_fn':f"input/{os.path.basename(train_data_https)}",
                                    'in_atl08_sample_url':train_data_https,
                                    'in_tile_num': INPUT_TILE_NUM,
                                    'in_tile_fn_url': 'https://maap-ops-workspace.s3.amazonaws.com/shared/nathanmthomas/boreal_grid_albers90k_gpkg.gpkg',
                                    'in_tile_fn': 'input/boreal_grid_albers90k_gpkg.gpkg'
                }

            submit_result = maap.submitJob(
                    identifier='run_boreal_biomass',
                    algo_id='run_boreal_biomass_ubuntu',
                    version='master',
                    username='lduncanson', # username needs to be the same as whoever created the workspace
                    queue='maap-dps-worker-32gb',
                    **in_param_dict
                )

            #submit_result = 'submit test'
            if DPS_num in [1, 5, 10, 15, 20, 25, 50, 75, 100,200, 500,1000,2000, 3000,3500, 4000, len(INPUT_TILE_NUM_LIST)]:
                print(f"DPS run num: {DPS_num}, tile num: {INPUT_TILE_NUM}, job info: {submit_result}") 
                print(f"DPS job status: {maap.getJobStatus(submit_result.get('job_id')) }" )
        else:
            print(f"Tile num: {INPUT_TILE_NUM}")
 

DPS run num: 1, tile num: 42, job info: {'status': 'success', 'http_status_code': 200, 'job_id': 'c5d82f82-141d-4fdb-bbc3-f3e5d0d4c6b6'}
DPS job status: <Response [200]>
DPS run num: 5, tile num: 224, job info: {'status': 'success', 'http_status_code': 200, 'job_id': '5f645b14-31f2-40a9-a4e8-63cae2e634fa'}
DPS job status: <Response [200]>
DPS run num: 10, tile num: 111, job info: {'status': 'success', 'http_status_code': 200, 'job_id': 'bf13176d-28f7-4d77-b542-ad496b930080'}
DPS job status: <Response [200]>
DPS run num: 12, tile num: 131, job info: {'status': 'success', 'http_status_code': 200, 'job_id': '46a7c255-614e-46a9-86ec-d9776ebc4474'}
DPS job status: <Response [200]>


## Get other lists just of missing tiles (various ways)

In [26]:
need_tindex_master = pd.read_csv('/projects/my-public-bucket/DPS_tile_lists/Need_AGB_tindex_master.csv')
print(len(need_tindex_master))

INPUT_TILE_NUM_LIST = need_tindex_master.tile_num.tolist()

# Remove duplicate tile nums
INPUT_TILE_NUM_LIST = list(set(INPUT_TILE_NUM_LIST))

print(len(INPUT_TILE_NUM_LIST))

INPUT_TILE_NUM_LIST_NEED = INPUT_TILE_NUM_LIST

127
113


In [1]:
weird_tiles = [4304, 4305, 4221, 4220, 1785, 1718, 1720, 1661, 1257, 1318, 1317]
werid_tiles = [1255, 1196, 949, 1062, 1063, 1005, 950, 1004]
INPUT_TILE_NUM_LIST = weird_tiles
len(INPUT_TILE_NUM_LIST)

11

In [31]:
# Get all boreal tiles
ATL08_filt_tindex_master =   pd.read_csv('s3://maap-ops-workspace/shared/lduncanson/DPS_tile_lists/ATL08_filt_tindex_master.csv')
boreal_tile_index_path = '/projects/shared-buckets/nathanmthomas/boreal_grid_albers90k_gpkg.gpkg'
boreal_tile_index = geopandas.read_file(boreal_tile_index_path)
boreal_tile_index.rename(columns={"layer":"tile_num"}, inplace=True)
boreal_tile_index["tile_num"] = boreal_tile_index["tile_num"].astype(int)

bad_tiles = [3540,3634,3728,3823,3916,4004] #Dropping the tiles near antimeridian that reproject poorly.
select_needs = [3360,2994,3190,2840,3012,3014,3017,2932,1261,1263,1264,988,978,794, 380,378,411,821,861,
                812,765,764,1308,1302,1469,1406,2495,2883,2965,3321,3509,3510,3327,3335,2976,2906,2907,2894,2814,4253,4293,4403,4440,4408,4372,4477,3986]

# Remove bad tiles
boreal_tile_index = boreal_tile_index[~boreal_tile_index['tile_num'].isin(bad_tiles)]
    
#print(boreal_tile_index.head())
tile_matches_select_needs = boreal_tile_index.merge(ATL08_filt_tindex_master[ATL08_filt_tindex_master['tile_num'].isin(select_needs)], how='right', on='tile_num')
print(len(tile_matches_select_needs))
select_needs_filt = print([t for t in tile_matches_select_needs.tile_num])

45
[978, 988, 978, 988, 1263, 1261, 1264, 794, 2932, 3190, 3012, 3014, 3360, 3017, 378, 765, 812, 821, 764, 861, 1302, 1308, 1469, 2894, 2883, 2965, 2976, 3327, 3321, 3510, 3509, 4372, 4440, 4477, 2994, 1406, 2840, 411, 380, 3335, 2495, 4403, 2906, 2907, 2814]


In [32]:
#select_needs_filt = [978, 988, 978, 988, 1263, 1261, 1264, 794, 2932, 3190, 3012, 3014, 3360, 3017, 378, 765, 812, 821, 764, 861, 1302, 1308, 1469, 2894, 2883, 2965, 2976, 3327, 3321, 3510, 3509, 4372, 4440, 4477, 2994, 1406, 2840, 411, 380, 3335, 2495, 4403, 2906, 2907, 2814]

print(len(select_needs_filt))

import numpy as np
tile_nums_missing_but_wont_run = np.setdiff1d(select_needs, select_needs_filt)
print(len(tile_nums_missing_but_wont_run))
print(tile_nums_missing_but_wont_run)

tile_nums_missing_periphery = np.setdiff1d(INPUT_TILE_NUM_LIST_NEED, select_needs_filt)
print(len(tile_nums_missing_periphery))
print(tile_nums_missing_periphery)
INPUT_TILE_NUM_LIST = tile_nums_missing_periphery
len(INPUT_TILE_NUM_LIST)    

45
4
[3986 4253 4293 4408]
70
[  65  106  127  250  361  417  493  566  568  618  741  753  786  798
  801  830  848  876  877  927 1273 1287 1318 1402 1438 1439 1440 1604
 1848 1850 1852 1936 2425 2426 2952 3124 3355 3540 3542 3634 3728 3823
 3845 3846 3895 3916 4004 4007 4018 4083 4176 4204 4250 4256 4307 4325
 4342 4395 4417 4444 4450 4451 4463 4467 4468 4483 4499 4510 4527 4534]


70

In [18]:
INPUT_TILE_NUM_LIST = [4440,4372,4477]