# DPS the export of Google Earth Engine assets to MAAP

In [None]:
import ee
import numpy as np
import geopandas as gpd
import pandas as pd
import shapely
import glob
import os
from shapely.geometry import Polygon
from shapely.geometry import box
import matplotlib
import matplotlib.pyplot as plt
import rasterio as rio

In [2]:
ee.Authenticate()

Enter verification code:  4/1Adeu5BUFhHW_VJ9duMcJoLciH30AuZ63vUqbYwkUzpN5rf05ZjvJ9Ma7z3c



Successfully saved authorization token.


In [3]:
!cp $HOME/.config/earthengine/credentials /$HOME/shared-buckets/nathanmthomas/GEE/

In [4]:
creds_file = 'https://maap-ops-workspace.s3.amazonaws.com/nathanmthomas/GEE/credentials'

## Steps to commit, build DPS registration yaml, register DPS algorithm from yaml
### Commit with Tag for running
1) Add version name as a *tag_name* of `EXPORT_GEE_v1` or whatever is appropriate - both to this notebook and algorithm config yaml

2) follow git instructions (every time!!):  
 - git add changes  
 - git commit -m 'message'  
 - git tag -f `EXPORT_GEE_v1`    
 - git push  
 - git push origin -f `EXPORT_GEE_v1`  
        # --push to gitlab no longer needed --   
        # git push dps    
        # git push dps -f `EXPORT_GEE_v1`    

3) if it looks weird check git log to make sure tag is at same place as origin and dps

### Build DPS Registration yaml  
- use MAAP Register Algorithm tool  
- refer to `code/icesat2_boreal/dps/registered/do_export_gee_to_maap.yml` for existing template  
- update reg yaml repository url to the github url (gitlab no longer needed) 
- update `above_env.yml` and `build_command_main.sh` to include all pinned versions of packages - to ensure stability of env for this alg  

# Register algorithm

### Use Registration yaml: Register DPS algorithm
 - We need to register the DPS algorithm before proceeding to the chunks below...

In [152]:
maap.register_algorithm_from_yaml_file("/projects/code/icesat2_boreal/dps/registered/do_export_gee_to_maap.yml").text

'{"code": 200, "message": {"id": "1042d2d1484db380c5bcd3640e1929f9e1f4c309", "short_id": "1042d2d1", "created_at": "2023-07-19T21:12:02.000+00:00", "parent_ids": ["d4dde57d3c81d02acdf1634a9a5308fea33cc542"], "title": "Registering algorithm: do_HLS_stack_3-1-2", "message": "Registering algorithm: do_HLS_stack_3-1-2", "author_name": "root", "author_email": "root@a69d8486d28b", "authored_date": "2023-07-19T21:12:02.000+00:00", "committer_name": "root", "committer_email": "root@a69d8486d28b", "committed_date": "2023-07-19T21:12:02.000+00:00", "trailers": {}, "web_url": "https://repo.maap-project.org/root/register-job-hysds-v4/-/commit/1042d2d1484db380c5bcd3640e1929f9e1f4c309", "stats": {"additions": 0, "deletions": 0, "total": 0}, "status": "pending", "project_id": 3, "last_pipeline": {"id": 515, "iid": 59, "project_id": 3, "sha": "1042d2d1484db380c5bcd3640e1929f9e1f4c309", "ref": "main", "status": "pending", "source": "push", "created_at": "2023-07-19T21:12:03.205Z", "updated_at": "2023-0

In [310]:
# MAAP algorithm version name
MAAP_VERSION = "EXPORT_GEE_v1"

In [311]:
%%time
submit_results_df_list = []
len_input_list = len(DPS_INPUT_TILE_NUM_LIST)
print(f"# of input tiles for DPS: {len_input_list}")

for i, INPUT_TILE_NUM in enumerate(DPS_INPUT_TILE_NUM_LIST):

    DPS_num = i+1
    IDENTIFIER = IDENTIFIER 
    ALGO_ID = "do_HLS_stack_3-1-2"
    USER = 'montesano'
    WORKER_TYPE = 'maap-dps-worker-32gb'
    
    in_param_dict = {
                         'in_tile_fn': 'https://maap-ops-workspace.s3.amazonaws.com/shared/nathanmthomas/boreal_tiles_v003.gpkg',
                         'in_tile_num': INPUT_TILE_NUM,
                         'in_tile_layer': 'boreal_tiles_v003',
                         'sat_api': 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD',
                        #'sat_api': 'https://landsatlook.usgs.gov/sat-api',
                         'tile_buffer_m': 0,
                         'start_year': YEAR_START,
                         'end_year': YEAR_STOP,
                         'start_month_day': SEASON_START,
                         'end_month_day': SEASON_STOP,
                         'max_cloud': MAX_CLOUDS,
                         'composite_type': 'HLS',
                         'shape': 3000,
                         'hls_product': 'H30'
        }
        
    submit_result = maap.submitJob(
                                    identifier=IDENTIFIER,
                                    algo_id=ALGO_ID,
                                    version=MAAP_VERSION, # "HLS_stack_2023_v1"
                                    username=USER,
                                    queue=WORKER_TYPE,
                                    # Args that match yaml
                                    **in_param_dict
        )
    
    # Build a dataframe of submission details
    submit_result_df = pd.DataFrame( 
        {
                'dps_num':[DPS_num],
                'tile_num':[INPUT_TILE_NUM],
                'submit_time':[datetime.datetime.now()],
                'dbs_job_hour': [datetime.datetime.now().hour],
                'algo_id': [ALGO_ID],
                'user': [USER],
                'worker_type': [WORKER_TYPE],
                'job_id': [submit_result.id],
                'submit_status': [submit_result.retrieve_status()],
            
        } 
    )
    
    # Append to a list of data frames of submission results
    submit_results_df_list.append(submit_result_df)
    
    if DPS_num in [1, 5, 10, 50, 100, 250, 500, 750, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 7000, 9000, 11000, 13000, 15000, 17000, 19000, 21000, 24000, len_input_list]:
        print(f"DPS run #: {DPS_num}\t| tile num: {INPUT_TILE_NUM}\t| submit status: {submit_result.retrieve_status()}\t| job id: {submit_result.id}") 
        
# Build a final submission results df and save
submit_results_df = pd.concat(submit_results_df_list)
submit_results_df['run_name'] = RUN_NAME
nowtime = pd.Timestamp.now().strftime('%Y%m%d%H%M')
print(f"Current time:\t{nowtime}")
submit_results_df.to_csv(f'/projects/my-public-bucket/dps_submission_results/DPS_{ALGO_ID}_{RUN_NAME}_submission_results_{len_input_list}_{nowtime}.csv')
submit_results_df.info()

# of input tiles for DPS: 192
DPS run #: 1	| tile num: 27100	| submit status: Accepted	| job id: 7a975ff5-8a83-4cfe-a77e-90177b45cdc3
DPS run #: 5	| tile num: 2739	| submit status: Accepted	| job id: 1c824be6-5256-43f5-bdde-7c8b77fb8a9d
DPS run #: 10	| tile num: 3239	| submit status: Accepted	| job id: 990d3003-af89-44b5-ae3f-0ad18cd7f472
DPS run #: 50	| tile num: 2735	| submit status: Accepted	| job id: 94016890-b884-411b-a444-2a8426903565
DPS run #: 100	| tile num: 567	| submit status: Accepted	| job id: 95705d70-32d2-46f0-886d-17db3267acf0
DPS run #: 192	| tile num: 353	| submit status: Accepted	| job id: 76205ff7-ea68-4009-a308-f78d072d5f3b
Current time:	202308101721
<class 'pandas.core.frame.DataFrame'>
Index: 192 entries, 0 to 0
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   dps_num        192 non-null    int64         
 1   tile_num       192 non-null    int64         
 2   submit_time  