In [3]:
from maap.maap import MAAP
maap = MAAP()

# DPS the export of Google Earth Engine assets to MAAP

These assets were created and exported by tile via a loop in Google Earth Engine here:  
https://code.earthengine.google.com/?scriptPath=users%2Fmmacander%2Fveg_structure%3Asentinel_1%2Fseasonal_s1_tile_export_v2  
old version:  
https://code.earthengine.google.com/?scriptPath=users%2Fpaulmontesano%2Fdefault%3Aseasonal_s1_tile_export

and can be viewed here:  
https://code.earthengine.google.com/?scriptPath=users%2Fpaulmontesano%2Fdefault%3Aseasonal_s1_view

Here, the composites are created and shown on-the-fly:  
https://code.earthengine.google.com/?scriptPath=users%2Fpaulmontesano%2Fdefault%3Aseasonal_s1_comps

In [4]:
import ee
import numpy as np
import geopandas as gpd
import pandas as pd
import shapely
import glob
import os
from shapely.geometry import Polygon
from shapely.geometry import box
import matplotlib
import matplotlib.pyplot as plt
import rasterio as rio
import sys
sys.path.append('/projects/code/icesat2_boreal/lib')

import datetime


In [51]:
#!aws s3 rm --recursive s3://maap-ops-workspace/montesano/dps_output/do_gee_download_by_subtile/EXPORT_GEE_v2/SAR_S1_2018/2023

## Authenticate current access to Google Earth Engine
Make a current credentials file; copy it to private bucket; get its s3 path to pass to script 

In [None]:
ee.Authenticate()

In [5]:
USER = 'montesano' #'nathanmthomas'
#!mkdir -p $HOME/my-private-bucket/GEE/

In [6]:
!cp $HOME/.config/earthengine/credentials $HOME/my-private-bucket/

In [7]:
creds_fn = f'https://maap-ops-workspace.s3.amazonaws.com/{USER}/credentials'
creds_fn

'https://maap-ops-workspace.s3.amazonaws.com/montesano/credentials'

## Steps to commit, build DPS registration yaml, register DPS algorithm from yaml
### Commit with Tag for running
1) Add version name as a *tag_name* of `EXPORT_GEE_v1` or whatever is appropriate - both to this notebook and algorithm config yaml

2) follow git instructions (every time!!):  
 - git add changes  
 - git commit -m 'message'  
 - git tag -f `EXPORT_GEE_v2`    
 - git push  
 - git push origin -f `EXPORT_GEE_v2`  
        # --push to gitlab no longer needed --   
        # git push dps    
        # git push dps -f `EXPORT_GEE_v2`    

3) if it looks weird check git log to make sure tag is at same place as origin and dps

### Build DPS Registration yaml  
- use MAAP Register Algorithm tool  
- refer to `code/icesat2_boreal/dps/registered/do_export_gee_to_maap.yml` for existing template  
- update reg yaml repository url to the github url (gitlab no longer needed) 
- update `above_env.yml` and `build_command_main.sh` to include all pinned versions of packages - to ensure stability of env for this alg  

# Register algorithm for DPS

### Use Registration yaml: Register DPS algorithm
 - We need to register the DPS algorithm before proceeding to the chunks below...

In [22]:
maap.register_algorithm_from_yaml_file("/projects/code/icesat2_boreal/dps/registered/do_gee_download_by_subtile.yml").text

'{"code": 200, "message": {"id": "01c8d34a4ba81c447a244d892d87eb6e3c727a66", "short_id": "01c8d34a", "created_at": "2023-12-16T21:05:22.000+00:00", "parent_ids": ["12783f87d63b60173b2b2416f1917921de53a6fe"], "title": "Registering algorithm: do_gee_download_by_subtile", "message": "Registering algorithm: do_gee_download_by_subtile", "author_name": "root", "author_email": "root@e49d5cea4b76", "authored_date": "2023-12-16T21:05:22.000+00:00", "committer_name": "root", "committer_email": "root@e49d5cea4b76", "committed_date": "2023-12-16T21:05:22.000+00:00", "trailers": {}, "web_url": "https://repo.maap-project.org/root/register-job-hysds-v4/-/commit/01c8d34a4ba81c447a244d892d87eb6e3c727a66", "stats": {"additions": 0, "deletions": 0, "total": 0}, "status": "created", "project_id": 3, "last_pipeline": {"id": 10627, "iid": 662, "project_id": 3, "sha": "01c8d34a4ba81c447a244d892d87eb6e3c727a66", "ref": "main", "status": "created", "source": "push", "created_at": "2023-12-16T21:05:23.750Z", "u

# DPS test

### Use MAAP gui to View & Submit a single test job
check status here

In [5]:
# MAAP algorithm version name
MAAP_VERSION = "EXPORT_GEE_v4"

In [6]:
%%time
submitted_job_dict = maap.submitJob(
                identifier='TEST_SAR_S1_2019',
                algo_id="do_gee_download_by_subtile",
                version=MAAP_VERSION,
               username="montesano",
               queue="maap-dps-worker-8gb",
               creds_fn = 's3://maap-ops-workspace/montesano/credentials',
               subtile_loc="100",
               id_num="9",
               id_col='AGG_TILE_NUM',
               tile_size_m=500,
               asset_path="projects/foreststructure/Circumboreal/S1_triseason_albers_2019",
               out_dir="''")

CPU times: user 18.5 ms, sys: 5.73 ms, total: 24.3 ms
Wall time: 149 ms


In [7]:
submitted_job_dict.status

'success'

In [8]:
submitted_job_dict.id

'd3f8f63d-22d3-44b9-bda4-a90d501d0e71'

In [9]:
print(submitted_job_dict)

{'job_id': 'd3f8f63d-22d3-44b9-bda4-a90d501d0e71', 'status': 'success', 'machine_type': None, 'architecture': None, 'machine_memory_size': None, 'directory_size': None, 'operating_system': None, 'job_start_time': None, 'job_end_time': None, 'job_duration_seconds': None, 'cpu_usage': None, 'cache_usage': None, 'mem_usage': None, 'max_mem_usage': None, 'swap_usage': None, 'read_io_stats': None, 'write_io_stats': None, 'sync_io_stats': None, 'async_io_stats': None, 'total_io_stats': None, 'error_details': None, 'response_code': 200, 'outputs': []}


# DPS
### Fishnet of subtiles needed on the fly for each agg tile

In [131]:
import importlib
import do_gee_download_by_subtile
importlib.reload(do_gee_download_by_subtile)

<module 'do_gee_download_by_subtile' from '/projects/code/icesat2_boreal/lib/do_gee_download_by_subtile.py'>

In [134]:
CREDS_FN = 's3://maap-ops-workspace/montesano/credentials'

In [135]:
MAAP_VERSION = "EXPORT_GEE_v4"
IDENTIFIER = 'TEST_SAR_S1_2019'
ALGO_ID = "do_gee_download_by_subtile"
USER = 'montesano'
WORKER_TYPE = 'maap-dps-worker-8gb'

In [205]:
boreal_tiles = gpd.read_file('/projects/shared-buckets/montesano/databank/boreal_tiles_v004.gpkg')
AGG_TILES_FN = '/projects/shared-buckets/montesano/databank/boreal_tiles_v004_agg12/boreal_tiles_v004_agg12.gpkg'
boreal_tiles_agg_new_updated_dateline = gpd.read_file(AGG_TILES_FN) # created in icesat2_boreal/notebooks/project_aggtiles.ipynb

In [144]:
#### You need ASSET_PATH to get gee asset_df
ASSET_PATH = 'projects/foreststructure/Circumboreal/S1_triseason_albers_2019'

## To set up DPS

### need asset df converted to asset gdf LOCALLY for fishnet gdf creation to that tile & subtile combos sent to DPS match what is REMOTELY processing

In [211]:
import importlib
import do_gee_download_by_subtile
importlib.reload(do_gee_download_by_subtile)

<module 'do_gee_download_by_subtile' from '/projects/code/icesat2_boreal/lib/do_gee_download_by_subtile.py'>

In [146]:
#!aws s3 rm --recursive s3://maap-ops-workspace/shared/montesano/local_output/do_gee_download_by_subtile_TEST4

In [147]:
# We need this asset_df to know how to loop over asset tiles - but the script also needs it during processing and to use it to build a fishnet_df 
asset_df = do_gee_download_by_subtile.get_gee_assets(ASSET_PATH)
asset_df = pd.concat([asset_df.drop(['properties'], axis=1), asset_df['properties'].apply(pd.Series)], axis=1) # makes the AGG_TILE_NUM field available

TILE_FIELD_NAME = 'AGG_TILE_NUM' # in CAPS in asset_df ; lower  
TILE_SIZE_M = 500
GRID_SIZE_M = 30

In [195]:
input_coordinates_list = [asset_df['system:footprint'][i]['coordinates'] for i, TILE_NUM in enumerate(asset_df[TILE_FIELD_NAME].to_list())]
input_tiles_list = [TILE_NUM for i, TILE_NUM in enumerate(asset_df[TILE_FIELD_NAME].to_list())]

# Create a GeoDataFrame with the polygon
# ---- here is the problem - the polygons created has bad coordinates from GEE meridian issue, and also dateline issue?
asset_gdf = pd.concat([do_gee_download_by_subtile.create_polygon_from_coordinates(input_coordinates) for input_coordinates in input_coordinates_list], ignore_index=True)
asset_gdf = pd.concat([asset_gdf, pd.DataFrame({TILE_FIELD_NAME : input_tiles_list})], axis=1)
asset_gdf.boundary.explore()

### Shows the GEE-derived asset gdf (blue) vs the local updated agg tiles (red)

### `TODO`: fix GEE asset DF to GDF
1. to remotely produce the nice updated agg tiles so subtile generation isnt a mess of extra water tiles...  
2. to not get `EEException: Unable to export unbounded image` errors for tiles at meridian (and anti-meridian?)

In [196]:
# Can you fix asset_gdf?

# Split polygons by the international date line
# ** Not working as expected..
# this workd on the dateline tiles, but not on the prime meridian tiles
#
# Try intersection with boreal tiles first?
asset_gdf_intersect = gpd.overlay(asset_gdf, boreal_tiles.to_crs(asset_gdf.crs), how='intersection')
asset_gdf_intersect.boundary.explore()
# asset_gdf_updated = do_gee_download_by_subtile.split_polygons_by_dateline_poly(asset_gdf_intersect.head(69).to_crs(boreal_tiles.crs), tile_num_col=TILE_FIELD_NAME).to_crs(4326)
# m = asset_gdf_updated.boundary.explore(color='green')
# m

In [189]:
m = asset_gdf.boundary.explore( color='blue')
boreal_tiles_agg_new_updated_dateline.boundary.explore(m=m, color='red')

In [209]:
# Give up - use local agg tiles
asset_gdf = boreal_tiles_agg_new_updated_dateline

# Put the column name onto the asset gdf correctly
if not TILE_FIELD_NAME in asset_gdf.columns:
    asset_gdf[TILE_FIELD_NAME] = asset_gdf[TILE_FIELD_NAME.lower()]
    print(asset_gdf.columns)

do
Index(['level_0', 'level_1', 'agg_tile_num', 'isLine', 'geometry',
       'AGG_TILE_NUM'],
      dtype='object')


In [215]:
fishnet_gdf.explore()

#### Test on dateline subtiles : 
downloads of subtiles on dateline will fail  
tested with AGG12 tile 9 subtile nums 47(water), 1762(land) (left of line; works) and 48(water), 1763(land) (right of line, fails)  

Since these are just subtile fails, probably ok - given small subtile area

In [219]:
OUTDIR_TMP = '/projects/my-public-bucket/local_output/do_gee_download_by_subtile_TEST4'
!mkdir -p $OUTDIR_TMP

CREDS_FN = 's3://maap-ops-workspace/montesano/credentials'

LIST_in_param_dict = []

#for index, row in asset_gdf.head(9).tail(1).iterrows():
for index, row in asset_gdf.iterrows():
    tile_num = row[TILE_FIELD_NAME]
    
    # Get a agg tile specific fishnet to iterate over
    fishnet_gdf = do_gee_download_by_subtile.create_fishnet_new(asset_gdf[asset_gdf[TILE_FIELD_NAME] == tile_num], TILE_SIZE_M * GRID_SIZE_M, TILE_FIELD_NAME, FIX_DATELINE=False) # Dateline subtiles will still fail regardless - maybe ok?

    #for index, row in fishnet_gdf.head(1764).tail(2).iterrows():
    for index, row in fishnet_gdf.iterrows():
        subtile_num = row['subtile_num']
        
        # Put a check here for intersection with boreal_tiles? - dont want subtiles out in the ocean
        
        if False:
            # Local tests like this
            do_gee_download_by_subtile.do_gee_download_by_subtile(SUBTILE_LOC=subtile_num, ID_NUM=tile_num, ID_COL=TILE_FIELD_NAME, ASSET_PATH=ASSET_PATH, TILE_SIZE_M=TILE_SIZE_M, OUTDIR=OUTDIR_TMP, ASSET_GDF_FN=AGG_TILES_FN)
        else:
            # Need params list for DPS
            in_param_dict = {
                                'creds_fn' : CREDS_FN,
                                 'subtile_loc': subtile_num,
                                 'id_num': tile_num,
                                 'id_col': TILE_FIELD_NAME,
                                 'tile_size_m': TILE_SIZE_M,
                                 'asset_path': ASSET_PATH

                }

            LIST_in_param_dict.append(in_param_dict)
len(LIST_in_param_dict)            

240898

## Multiprocess DPS submission
Might be the best way of doing this