In [1]:
from maap.maap import MAAP
maap = MAAP(maap_host='api.ops.maap-project.org')

# Launch DPS for tile_atl08.py

In [1]:
import os
import geopandas
import pandas as pd

  shapely_geos_version, geos_capi_version_string


In [3]:
def get_stack_fn(stack_list_fn, in_tile_num):
    # Find most recent topo/Landsat stack path for tile in list of stack paths from *tindex_master.csv
    all_stacks_df = pd.read_csv(stack_list_fn)
    stack_for_tile = all_stacks_df[all_stacks_df['location'].str.contains("_"+str(in_tile_num))]
    [print(i) for i in stack_for_tile.path.to_list()]
    stack_for_tile_fn = stack_for_tile.path.to_list()[0]
    if len(stack_for_tile)==0:
        stack_for_tile_fn = None
    return(stack_for_tile_fn)

# nmt added: code that returns df of landsat locations and tile number
# This is basically CountOutput.py
def get_stack_df(dps_dir, TYPE, dps_year):
    
    if "Landsat" in TYPE:
        root = f"/projects/my-private-bucket/dps_output/do_landsat_stack_3-1-2_ubuntu/ops/{dps_year}/"
        ends_with_str = "_dps.tif"
    if "Topo" in TYPE:
        root = f"/projects/my-private-bucket/dps_output/do_topo_stack_3-1-5_ubuntu/ops/{dps_year}/"
        ends_with_str = "_stack.tif"
    if "ATL08" in TYPE:
        root = f"/projects/my-private-bucket/dps_output/run_extract_ubuntu/ops/{dps_year}/"
        ends_with_str = "0m.csv"
            
    df = pd.DataFrame(columns=['location', 'tile_num'])

    for dir, subdir, files in os.walk(root):
        for fname in files:
            if fname.endswith(ends_with_str): 
                 
                tile_num = fname.split('_')[1]
                   
                if "ATL08" in TYPE:
                    df = df.append({'location':os.path.join(dir+"/", fname)},ignore_index=True)
                else:
                    df = df.append({'location':os.path.join(dir+"/", fname), 'tile_num':tile_num},ignore_index=True)
        
    return df

# Topo and Landsat tindex_master csvs from CountOutput.py
topo_tindex = "/projects/my-public-bucket/DPS_tile_lists/Topo_tindex_master.csv"
landsat_tindex = "/projects/my-public-bucket/DPS_tile_lists/Landsat_tindex_master.csv"

# Model-read subset of tiles for which Topo and Landsat coincide
model_ready_tiles_topo = "/projects/my-public-bucket/DPS_tile_lists/model_ready_tiles_topo_paths.csv"
model_ready_tiles_landsat = "/projects/my-public-bucket/DPS_tile_lists/model_ready_tiles_landsat_paths.csv"

## Get df's from tindex_master csvs for Topo and Landsat tiles

In [4]:
if os.path.isfile(landsat_tindex) and os.path.isfile(topo_tindex):
    print('Reading existing...')
    ls8_df = pd.read_csv(landsat_tindex)
    topo_df = pd.read_csv(topo_tindex)
else:
    s3_stem = 'https://s3.console.aws.amazon.com/s3/buckets/maap-ops-workspace/nathanmthomas'
    local_stem = '/projects/my-private-bucket'

    ls8_root =  s3_stem + '/dps_output/do_landsat_stack_3-1-2_ubuntu'
    topo_root = s3_stem + '/dps_output/do_topo_stack_3-1-5_ubuntu'
    
    ls8_df = get_stack_df(ls8_root, "Landsat")
    topo_df = get_stack_df(topo_root, "Topo")
topo_df.head()

Reading existing...


Unnamed: 0.1,Unnamed: 0,local_path,tile_num
0,0,/projects/my-private-bucket/dps_output/do_topo...,421
1,1,/projects/my-private-bucket/dps_output/do_topo...,455
2,2,/projects/my-private-bucket/dps_output/do_topo...,456
3,3,/projects/my-private-bucket/dps_output/do_topo...,491
4,4,/projects/my-private-bucket/dps_output/do_topo...,492


## Get tile nums for coincident Topo and Landsat tiles

In [5]:
# added by nmt: get filenames of co-incident landsat and topo

topo_sub_df = pd.DataFrame(columns=['local_path','tile_num'])
ls8_sub_df = pd.DataFrame(columns=['local_path','tile_num'])

for i in range(len(ls8_df['tile_num'])):
    ls_tile_num = ls8_df['tile_num'][i]
    for j in range(len(topo_df['tile_num'])):
        topo_tile_num = topo_df['tile_num'][j]
        if ls_tile_num == topo_tile_num:
            # Only need to choose one, but we'll do 2 and then check
            ls8_sub_df = ls8_sub_df.append({'local_path':ls8_df['local_path'][i],'tile_num':ls8_df['tile_num'][i].astype(int)}, ignore_index=True)
            topo_sub_df = topo_sub_df.append({'local_path':topo_df['local_path'][j],'tile_num':topo_df['tile_num'][j].astype(int)}, ignore_index=True)

#ls8_sub_df['tile_num'] = ls8_sub_df['tile_num'].astype(float, errors = 'raise')
print(ls8_sub_df.head())
print(topo_sub_df.head())
print(len(ls8_sub_df),len(topo_sub_df))

topo_sub_df.to_csv( model_ready_tiles_topo, index=False, encoding='utf-8-sig')
ls8_sub_df.to_csv( model_ready_tiles_landsat, index=False, encoding='utf-8-sig')

                                          local_path tile_num
0  /projects/my-private-bucket/dps_output/do_land...      986
1  /projects/my-private-bucket/dps_output/do_land...      987
2  /projects/my-private-bucket/dps_output/do_land...      979
3  /projects/my-private-bucket/dps_output/do_land...      984
4  /projects/my-private-bucket/dps_output/do_land...      982
                                          local_path tile_num
0  /projects/my-private-bucket/dps_output/do_topo...      986
1  /projects/my-private-bucket/dps_output/do_topo...      987
2  /projects/my-private-bucket/dps_output/do_topo...      979
3  /projects/my-private-bucket/dps_output/do_topo...      984
4  /projects/my-private-bucket/dps_output/do_topo...      982
4465 4465


In [6]:
topo_sub_df = pd.read_csv("/projects/my-public-bucket/DPS_tile_lists/model_ready_tiles_topo_paths.csv")
INPUT_TILE_NUM_LIST = topo_sub_df['tile_num'].values.astype(int).tolist()
len(INPUT_TILE_NUM_LIST)

4465

## Run a DPS job

In [13]:
#"https://maap-ops-workspace.s3.amazonaws.com.com/lduncanson"
#s3_stem = 'https://s3.console.aws.amazon.com/s3/buckets/maap-ops-workspace/nathanmthomas'
#local_stem = '/projects/my-private-bucket'
RUN_DPS  = True
if RUN_DPS:
    ##################################
    #Test DPS submission on a single file
    for i, INPUT_TILE_NUM in enumerate(INPUT_TILE_NUM_LIST):
        DPS_num = i+1
        if True:
            in_param_dict = {
                                'in_tile_num': INPUT_TILE_NUM,
                                'in_tile_fn': 'https://maap-ops-workspace.s3.amazonaws.com/shared/nathanmthomas/boreal_tiles_v002.gpkg',
                                'in_tile_layer': 'boreal_tiles_v002',
                                'csv_list_fn': 's3://maap-ops-workspace/shared/lduncanson/DPS_tile_lists/ATL08_tindex_master.csv',
                                'topo_stack_list_fn': 's3://maap-ops-workspace/shared/nathanmthomas/DPS_tile_lists/Topo_tindex_master.csv',
                                'landsat_stack_list_fn': 's3://maap-ops-workspace/shared/nathanmthomas/DPS_tile_lists/Landsat_tindex_master.csv',
                                'user_stacks': 'nathanmthomas',
                                'user_atl08': 'lduncanson',
                                'thresh_sol_el': 5,
                                'v_ATL08': 4,
                                'minmonth': 6,
                                'maxmonth': 9
            }

            submit_result = maap.submitJob(
                identifier='run_tile_atl08',
                algo_id='run_tile_atl08_ubuntu',
                version='master',
                username='lduncanson', # username needs to be the same as whoever created the workspace
                queue='maap-dps-worker-8gb',
                **in_param_dict
            )

        #submit_result = 'submit test'
        if DPS_num in [1, 100,500,1000,3000, len(INPUT_TILE_NUM_LIST)]:
           print(f"DPS run num: {DPS_num}, tile num: {INPUT_TILE_NUM}, job info: {submit_result}") 
 

DPS run num: 1, tile num: 421, job info: {'status': 'success', 'http_status_code': 200, 'job_id': '21ecd574-2803-433d-b150-b5a29d6e36b9'}
DPS run num: 100, tile num: 2201, job info: {'status': 'success', 'http_status_code': 200, 'job_id': '6ba66dea-6e56-4104-99e7-598bd661813a'}
DPS run num: 500, tile num: 178, job info: {'status': 'success', 'http_status_code': 200, 'job_id': '5183f391-d589-439a-92d3-eeddf336136e'}
DPS run num: 1000, tile num: 1873, job info: {'status': 'success', 'http_status_code': 200, 'job_id': '6bf2ec67-c949-48d3-a390-8776e24d9e77'}
DPS run num: 1585, tile num: 4551, job info: {'status': 'success', 'http_status_code': 200, 'job_id': 'd401a296-7369-4e42-9d61-f85eed63f39e'}


In [14]:
need_df_fn = pd.read_csv("/projects/my-public-bucket/DPS_tile_lists/Need_ATL08_filt_tindex_master.csv")
INPUT_TILE_NUM_LIST = need_df_fn['tile_num'].values.astype(int).tolist()
len(INPUT_TILE_NUM_LIST)

499