In [None]:
import pickle
import time

# Download DW labels using Earth Engine Python API

This notebook demonstrates how to download the dynamic world (DW) labels from the Google Earth Engine (GEE) to Google Drive.

**important!!** \
It seems that unfortunately Google Colab notebook (like [this](https://colab.research.google.com/github/google/earthengine-api/blob/master/python/examples/ipynb/ee-api-colab-setup.ipynb)) allows read-only operations and does **NOT** allow any writes to, say, Google Drive. 
So, we cannot download anything through Google Colab. Hence, we must choose other ways than using Google Colab.\
Here, we run this notebook on local emvironment. You may want to follow [this](https://developers.google.com/earth-engine/guides/python_install#install-options) for python API installation and getting authentication.
Following the instruction, it leads you to [this page](https://cloud.google.com/sdk/auth_success). Now you're all set!

### Import and initialize the API

After getting authenticated and launching this notebook, run the following cell to import the API into your session and initialize the library.

In [None]:
import ee
ee.Initialize()

## Test the API

Test the API by printing the elevation of Mount Everest.

In [None]:
# Print the elevation of Mount Everest.
dem = ee.Image('USGS/SRTMGL1_003')
xy = ee.Geometry.Point([86.9250, 27.9881])
elev = dem.sample(xy, 30).first().get('elevation').getInfo()
print('Mount Everest elevation (m):', elev)

Mount Everest elevation (m): 8729


## Download DW labels

### Specify the folder that we store DW labels in

Downloaded DW labels will be stored in Google Drive of the gmail account for your GEE. 

Note: \
(a) if the folder name exists at any level, the output is written to it, \
(b) if duplicate folder names exist, output is written to the most recently modified folder, \
(c) if the folder name does not exist, a new folder will be created at the root, and \
(d) folder names with separators (e.g. 'path/to/file') are interpreted as literal strings, not system paths. Defaults to Drive root.

In [None]:
dir_name = 'dw_probs_mean_rounded'

### Read the coordinates of NLCD labels

Read the txt file `coors` including the list of coordinates of all NLCD labels

In [None]:
f = open("./coors.txt","rb")
coors = pickle.load(f)
len(coors)

2250

The structure of `coors` is as the following.

In [None]:
# for experiment

# coors = [
#   ['3137',  3880,
#   3880,
#   388,
#   388,  
#     [
#       -76.89980600000001, 39.58152199999998, -76.8548903, 39.616130799999986
#     ]
#   ],
#   ['3716',  3880,
#   3880,
#   388,
#   388,  
#     [
#       -76.45056472222223, 38.218333333333334,-76.40557333333334, 38.25361138888889
#     ]
#   ],
#  ...
# ]

### Parameter settings

We specify the parameters.

In [None]:
snippet = 'GOOGLE/DYNAMICWORLD/V1'

labelBand = 'label'
probabilityBands = [
  'water', 'trees', 'grass', 'flooded_vegetation',
  'crops', 'shrub_and_scrub', 'built', 'bare', 
  'snow_and_ice'
]

CLASS_NAMES = [
      'water', 'trees', 'grass', 'flooded_vegetation',
      'crops', 'shrub_and_scrub', 'built', 'bare',
      'snow_and_ice']

VIS_PALETTE = [
      '419BDF', '397D49', '1BC428', '7A87C6',
      '1BC428', '1BC428', 'C4281B', '1BC428',
      '000000']

year_bef = '2015'
year_aft = '2017'

# We select images from summer season
# Note that DW labels became available from 2015-06-23
from_date = '06-23'  #inclusive
to_date = '09-30'    #exclusive (i.e. till 09-29 inclusive, Sorry, I did not notice it is exclusive when I ran this)

# Choose band(s) to be downloaded
# band = labelBand
band = probabilityBands

# Set reducer: labelBand -> mode(), probabilityBands -> mean() 
# reducer = ee.Reducer.mode()
reducer = ee.Reducer.mean()

In [None]:
from_date_bef = year_bef + '-' + from_date
to_date_bef = year_bef + '-' + to_date

from_date_aft = year_aft + '-' + from_date
to_date_aft = year_aft + '-' + to_date

# time_info = [(year_bef, from_date_bef, to_date_bef), (year_aft, from_date_aft, to_date_aft)]
time_info = [(year_aft, from_date_aft, to_date_aft)]

print(f'Before: {from_date_bef} -> {to_date_bef}') 
print(f'After : {from_date_aft} -> {to_date_aft}')

Before: 2015-06-23 -> 2015-09-30
After : 2017-06-23 -> 2017-09-30


### Functions for data-loading and exporting

we define two functions, one for data selection and preprocessing on GEE and another for exporting from GEE

In [None]:
def dataloder(coor, snippet, band, from_date, to_date, palette, reducer):
    geometry = ee.Geometry.Rectangle(coor)

    fil_geo = ee.Filter.geometry(geometry)
    fil_date = ee.Filter.date(from_date, to_date)

    dataset = (ee.ImageCollection(snippet) 
                .filter(fil_geo).filter(fil_date) 
                .select(band) 
                .reduce(reducer) 
#                 .visualize(min=0, max=8, palette=palette)   # to be colorful RGB images
                .clip(geometry) 
                .selfMask()
                .multiply(200)
                .uint8())

    return dataset, geometry

In [None]:
def save2gd(image, geometry, dir_name, file_name, im_dim):
# def save2gd(image, geometry, dir_name, file_name, im_scale): 
    task = ee.batch.Export.image.toDrive(**{
        'image': image,
        'description': file_name,
        'folder': dir_name,
        'dimensions': im_dim,
#         'scale': im_scale,  # "crsTransform", "scale", and "dimensions" are mutually exclusive
        'region': geometry,
        'crs': 'EPSG:3857' 
         # EPSG:3857 is the Geographic coordinate system (same as NLCD).
         # Note that the original DW labels are made by Mercator projection; EPSG:4326. So we should modify EPSG
         # You can confirm EPSG using the command "ds.GetProjection()" after reading the image by gdal library as below:
         # ds = gdal.Open('your_image_path', gdal.GA_ReadOnly)
    })
    
    # submit the task to GEE
    task.start()

### Run above functions for every coordinate of NLCD

The outputs (downloaded DW images) are in **tif** format. Cionsidering that DW labels are of 10m-resolution, the outputs are resized as the follows to save the datasize:

- If `xdim` (`ydim`), the width (height) of the corresponding NLCD label, is multiples of ten, then the width (height) of the output will be `dw_xdim`=`xdim`/10 (`dw_ydim`=`ydim`/10). \
- If not, then the width (height) will be `dw_xdim`=`xdim` (`dw_ydim`=`ydim`).

So, please resize when we use them for training. We can confirm `xdim`, `ydim`, `dw_xdim` and `dw_ydim` by looking into `coors`.

Note that since there's no gurantee that the 10m-pixels of DW labels and 30m-pixels of NLCD are consistent, the outputs here are approximate labels. 

**Output format (label band)** \
Each element in the downloaded labels are one of the followings:

0:   	water \
1:   	trees \
2:   	grass \
3:   	flooded_vegetation \
4:   	crops \
5:   	shrub_and_scrub \
6:   	built \
7:   	bare \
8:   	snow_and_ice

**Output format (probability bands)** \
The formats for each probability band is to be multiplied by 200 and converted (from 64bit) to uint8 type to save both the time to export and the datasize. This means that we rounded the probabilities to 0.5%-precision.
So please devide by 200 when you use the probabilities.
It is recommended to further normalize them so that the sum is to be one.

**important!!** \
Unfortunately, some of DW labels are not available for 2015 because the corresponding Sentinel images do not exist. In that case, they are not downloaded (`state` of those tasks are to be `FAILED`).

In [None]:
num_task = 3000
first_check_flg = 0
for i,[dw_name, xdim, ydim, dw_xdim, dw_ydim, coor] in enumerate(coors):
    
    # To avoid the following error message: 
    # "Too many tasks already in the queue (3000). Please wait for some of them to complete."
    if ((first_check_flg == 0) & (num_task > 2990)) | ((first_check_flg == 1) & (num_task > 200)):
        num_task = 0
        first_check_flg = 1
        while True:
            tl = ee.data.getTaskList()
            tl_incompleted = [task['state'] for task in tl 
                              if (task['state'] == 'READY') | (task['state'] == 'RUNNING')]
            if len(tl_incompleted) < 2800:
                break
            time.sleep(60)  # Wait for 1 min
            print(f'{len(tl_incompleted)} tasks still left. Waiting until some tasks finish...')
    
    # iteratively exporting
    print(f'for {i}th-image')
    im_dim = str(dw_xdim) + 'x' + str(dw_ydim)
    for (year, from_date, to_date) in time_info:
        dataset, geometry = dataloder(coor=coor, 
                                      snippet=snippet, 
                                      band=band,
                                      from_date=from_date, 
                                      to_date=to_date, 
                                      palette=VIS_PALETTE,
                                      reducer=reducer,
                                     )
        save2gd(image=dataset, geometry=geometry, dir_name=dir_name, 
                file_name=dw_name + '_' + year,
#                 file_name=str(i) + '_' + dw_name + '_' + year, 
                im_dim=im_dim)
#                 im_scale=10)
        num_task += 1
        print(f'Submitted exporting task for {year}')
    print()

for 0th-image
Submitted exporting task for 2017

for 1th-image
Submitted exporting task for 2017

for 2th-image
Submitted exporting task for 2017

for 3th-image
Submitted exporting task for 2017

for 4th-image
Submitted exporting task for 2017

for 5th-image
Submitted exporting task for 2017

for 6th-image
Submitted exporting task for 2017

for 7th-image
Submitted exporting task for 2017

for 8th-image
Submitted exporting task for 2017

for 9th-image
Submitted exporting task for 2017

for 10th-image
Submitted exporting task for 2017

for 11th-image
Submitted exporting task for 2017

for 12th-image
Submitted exporting task for 2017

for 13th-image
Submitted exporting task for 2017

for 14th-image
Submitted exporting task for 2017

for 15th-image
Submitted exporting task for 2017

for 16th-image
Submitted exporting task for 2017

for 17th-image
Submitted exporting task for 2017

for 18th-image
Submitted exporting task for 2017

for 19th-image
Submitted exporting task for 2017

for 20th-i