# Launch DPS for tile_atl08.py

In [4]:
import os
import geopandas
import pandas as pd
from maap.maap import MAAP
maap = MAAP()

In [6]:
def get_stack_fn(stack_list_fn, in_tile_num):
    # Find most recent topo/Landsat stack path for tile in list of stack paths from *tindex_master.csv
    all_stacks_df = pd.read_csv(stack_list_fn)
    stack_for_tile = all_stacks_df[all_stacks_df['location'].str.contains("_"+str(in_tile_num))]
    [print(i) for i in stack_for_tile.path.to_list()]
    stack_for_tile_fn = stack_for_tile.path.to_list()[0]
    if len(stack_for_tile)==0:
        stack_for_tile_fn = None
    return(stack_for_tile_fn)

# nmt added: code that returns df of landsat locations and tile number
# This is basically CountOutput.py
def get_stack_df(dps_dir, TYPE):
    
    if "Landsat" in TYPE:
        root = f"/projects/my-private-bucket/dps_output/do_landsat_stack_3-1-2_ubuntu/ops/{args.dps_year}/"
        ends_with_str = "_dps.tif"
    if "Topo" in TYPE:
        root = f"/projects/my-private-bucket/dps_output/do_topo_stack_3-1-5_ubuntu/ops/{args.dps_year}/"
        ends_with_str = "_stack.tif"
    if "ATL08" in TYPE:
        root = f"/projects/my-private-bucket/dps_output/run_extract_ubuntu/ops/{args.dps_year}/"
        ends_with_str = "0m.csv"
            
    df = pd.DataFrame(columns=['location', 'tile_num'])

    for dir, subdir, files in os.walk(root):
        for fname in files:
            if fname.endswith(ends_with_str): 
                 
                tile_num = fname.split('_')[1]
                   
                if "ATL08" in TYPE:
                    df = df.append({'location':os.path.join(dir+"/", fname)},ignore_index=True)
                else:
                    df = df.append({'location':os.path.join(dir+"/", fname), 'tile_num':tile_num},ignore_index=True)
        
    return df

# Topo and Landsat tindex_master csvs from CountOutput.py
topo_tindex = "/projects/my-public-bucket/DPS_tile_lists/Topo_tindex_master.csv"
landsat_tindex = "/projects/my-public-bucket/DPS_tile_lists/Landsat_tindex_master.csv"

# Model-read subset of tiles for which Topo and Landsat coincide
model_ready_tiles_topo = "/projects/my-public-bucket/DPS_tile_lists/model_ready_tiles_topo_paths.csv"
model_ready_tiles_landsat = "/projects/my-public-bucket/DPS_tile_lists/model_ready_tiles_landsat_paths.csv"

## Get df's from tindex_master csvs for Topo and Landsat tiles

In [5]:
if os.path.isfile(landsat_tindex) and os.path.isfile(topo_tindex):
    ls8_df = pd.read_csv(landsat_tindex)
    topo_df = pd.read_csv(topo_tindex)
else:
    s3_stem = 'https://s3.console.aws.amazon.com/s3/buckets/maap-ops-workspace/nathanmthomas'
    local_stem = '/projects/my-private-bucket'

    ls8_root =  s3_stem + '/dps_output/do_landsat_stack_3-1-2_ubuntu'
    topo_root = s3_stem + '/dps_output/do_topo_stack_3-1-5_ubuntu'
    
    ls8_df = get_stack_df(ls8_root, "Landsat")
    topo_df = get_stack_df(topo_root, "Topo")

## Get tile nums for coincident Topo and Landsat tiles

In [7]:
# added by nmt: get filenames of co-incident landsat and topo

topo_sub_df = pd.DataFrame(columns=['location','tile_num'])
ls8_sub_df = pd.DataFrame(columns=['location','tile_num'])

for i in range(len(ls8_df['tile_num'])):
    ls_tile_num = ls8_df['tile_num'][i]
    for j in range(len(topo_df['tile_num'])):
        topo_tile_num = topo_df['tile_num'][j]
        if ls_tile_num == topo_tile_num:
            ls8_sub_df = ls8_sub_df.append({'location':ls8_df['location'][i],'tile_num':ls8_df['tile_num'][i]}, ignore_index=True)
            topo_sub_df = topo_sub_df.append({'location':topo_df['location'][j],'tile_num':topo_df['tile_num'][j]}, ignore_index=True)

ls8_sub_df['tile_num'] = ls8_sub_df['tile_num'].astype(float, errors = 'raise')
print(ls8_sub_df.head())
print(topo_sub_df.head())
print(len(ls8_sub_df),len(topo_sub_df))

topo_sub_df.to_csv( model_ready_tiles_topo, index=False, encoding='utf-8-sig')
ls8_sub_df.to_csv( model_ready_tiles_landsat, index=False, encoding='utf-8-sig')

                                            location  tile_num
0  /projects/my-private-bucket/dps_output/do_land...     986.0
1  /projects/my-private-bucket/dps_output/do_land...     987.0
2  /projects/my-private-bucket/dps_output/do_land...     979.0
3  /projects/my-private-bucket/dps_output/do_land...     984.0
4  /projects/my-private-bucket/dps_output/do_land...     982.0
                                            location tile_num
0  /projects/my-private-bucket/dps_output/do_topo...      986
1  /projects/my-private-bucket/dps_output/do_topo...      987
2  /projects/my-private-bucket/dps_output/do_topo...      979
3  /projects/my-private-bucket/dps_output/do_topo...      984
4  /projects/my-private-bucket/dps_output/do_topo...      982
4465 4465


In [18]:
INPUT_TILE_NUM_LIST = topo_sub_df['tile_num'].values.astype(int).tolist()
INPUT_TILE_NUM_LIST[0:5]

[986, 987, 979, 984, 982]

## Run a single DPS job to test

#### TODO: the chunk below will be wrangled into shape by LD and PM

In [None]:
#"https://maap-ops-workspace.s3.amazonaws.com.com/lduncanson"
#s3_stem = 'https://s3.console.aws.amazon.com/s3/buckets/maap-ops-workspace/nathanmthomas'
#local_stem = '/projects/my-private-bucket'

if RUN_DPS:
    ##################################
    #Test DPS submission on a single file
    INPUT_TILE_NUM = INPUT_TILE_NUM_LIST[0]
    
    in_param_dict = {
                        'in_tile_num': INPUT_TILE_NUM,
                        'in_tile_fn': "/projects/my-public-bucket/boreal_grid_albers90k_gpkg.gpkg",
                        'in_tile_layer': "grid_boreal_albers90k_gpkg",
                        'csv_list_fn': , # This should be created once above to be referenced in every subsequent DPS run
                        'topo_stack_list_fn': "/projects/my-public-bucket/DPS_tile_lists/Topo_tindex_master.csv",
                        'landsat_stack_list_fn": "/projects/my-public-bucket/DPS_tile_lists/Landsat_tindex_master.csv",'
    }

    submit_result = maap.submitJob(
        identifier='run_tile_atl08_ubuntu',
        algo_id='run_tile_atl08_ubuntu',
        version='master',
        username='lduncanson', # username needs to be the same as whoever created the workspace
        queue='maap-dps-worker-8gb',
        **in_param_dict
    )
    
    print(submit_result)
    

## DPS over input tiles to output ATL08 tile CSVs