In [1]:
# Cloud Mask 2.0 

import ee
import yaml
import time
import os
import json
from argparse import ArgumentParser
from utils import clipToROI, exportImageCollectionToGCS, exportImageToGCS, sentinel2CloudScore, calcCloudCoverage
from utils import GEETaskManager

from gevent.fileobject import FileObjectThread

# Polygon Import from Misha ROI List

import pandas as pd

# Functions for Active Run of Cloud Mask 2.0 

from download_sen12 import *

  with loop.timer(seconds, ref=ref) as t:


In [None]:
config_file = "config.yml"

In [None]:
stream = open(config_file, 'r') 

In [None]:
config = yaml.load(stream)

In [None]:
config["data_list"][0]

In [None]:
config

# Initialize EE

In [None]:
ee.Initialize()

# Creating New Feature Collection To Use with CM_v2

In [None]:
def import_aois(csv_loc):    

    df_labels = pd.read_csv(csv_loc)
    df_labels = df_labels[["center-lat","center-long","polygon","Labels combined"]]

    polygons = []
    for polygon in df_labels["polygon"]:
        polygons.append(json.loads(polygon)["coordinates"])

    return polygons

polygons = import_aois("D:/canopy_data/csvs/polygons_101320.csv")

feature_id = 0 
features = []
for poly in polygons[0:3]:
    # create an roi. first item in Misha's label list
    feature_id += 1 
    
    # create geometry object, create feature object, append to features list for feature collection creation 
    polys = ee.Geometry.Polygon(poly)
    feature = ee.Feature(polys,{"name":feature_id})
    features.append(feature)

fc = ee.FeatureCollection(features)

In [None]:
fc

In [None]:
task_queue = GEETaskManager(n_workers=config['max_tasks'], max_retry=config['max_retry'], wake_on_task=True, log_file=config['log_file'], process_timeout=config['task_timeout'])
task_queue.register_monitor(monitor_tasks)

if os.path.exists('task_log.json'):
    task_log = load_task_log(filename='task_log.json')
    task_queue.set_task_log(task_log)

for data_list in config['data_list'][0:1]:
    for sensor_idx in data_list['sensors']:
        sensor = config['sensors'][sensor_idx]
        print(sensor)
        tasks = process_datasource(task_queue, data_list, sensor, config['export_to'], config['export_dest'], feature_list = fc)

print("Waiting for completion...")
task_queue.wait_till_done()

In [None]:
d = {'a': 1, 'b': 2, 'c': 3}

d.keys()

In [None]:
list(d.keys())[0]

In [None]:
list(d.values())[0]

In [None]:
d.items()

In [None]:
e = {'d': d, 'e': 0}

e

In [None]:
list(list(e.values())[0].keys())[0]

In [None]:
task_queue = GEETaskManager(n_workers=config['max_tasks'], max_retry=config['max_retry'], wake_on_task=True, log_file=config['log_file'], process_timeout=config['task_timeout'])
task_queue.register_monitor(monitor_tasks)

if os.path.exists('task_log.json'):
    task_log = load_task_log(filename='task_log.json')
    task_queue.set_task_log(task_log)

for data_list in config['data_list']:
    for sensor_idx in data_list['sensors']:
        sensor = config['sensors'][sensor_idx]
        tasks = process_datasource(task_queue, data_list, sensor, config['export_to'], config['export_dest'])

print("Waiting for completion...")
task_queue.wait_till_done()

In [None]:
!pwd

## makeFilterList

In [None]:
def makeFilterList(sensor):
    filters_before = None
    filters_after = None

    def _build_filters(filter_list):
        filters = []
        for f in filter_list:
            key = list(f.keys())[0]
            op = list(list(f.values())[0].keys())[0]
            val = list(list(f.values())[0].values())[0]
            filters.append(getattr(ee.Filter, op)(key, val))

        return filters

    if 'filters_before' in sensor:
        filters_before = _build_filters(sensor['filters_before'])

    if 'filters_after' in sensor:
        filters_after = _build_filters(sensor['filters_after'])

    return filters_before, filters_after

In [None]:
type(config)

In [None]:
config.keys()

In [None]:
config

In [None]:
config['sensors']

In [None]:
sensor = config['sensors'][0]
sensor

In [None]:
# Default values
filters_before = None
filters_after = None

In [None]:
# Sub function
def _build_filters(filter_list):
    # filter_list is a list of dictionaries. Includes the attributes for filtering an image collection
    filters = []
    # for each dict in filter_list
    # example: {'CLOUDY_PERCENTAGE': {'lte': 10}}
    for f in filter_list:
        # key is the first key of the dict -- the feature you're trying to filter by
        # example: 'CLOUDY_PERCENTAGE'
        key = list(f.keys())[0]
        # op is the key of the nested dictionary
        # example: 'lte'
        op = list(list(f.values())[0].keys())[0]
        # val is the value of the nested dictionary
        # example: 10
        val = list(list(f.values())[0].values())[0]
        # Make an ee.Filter object that matches the input filter dict
        # example: ee.Filter.lte('CLOUDY_PERCENTAGE', 10)
        # This will then get applied to an image_collection object
        filters.append(getattr(ee.Filter, op)(key, val))

In [None]:
help(ee.Filter)

In [None]:
# Because of the sorting (probably), you may want to apply filters specifically
# before or after creating the image_collection object.
# So we have separate filter lists for both before and after.
if 'filters_before' in sensor:
    filters_before = _build_filters(sensor['filters_before'])

if 'filters_after' in sensor:
    filters_after = _build_filters(sensor['filters_after'])
    
# So at the end, we build a list of ee.Filter objects based off of the sensor
# values for its 'filters_before' and 'filters_after' keys. If the sensor
# lacks one or both such keys, the filters_before and filters_after retain
# their default None value (i.e. no filters get applied).

In [None]:
ee.Initialize()

In [None]:
makeFilterList(sensor)

## makeImageCollection

In [None]:
def makeImageCollection(sensor, roi, start_date, end_date, modifiers=[]):
    # Make the filters based off of the previous function
    filters_before, filters_after = makeFilterList(sensor)

    # Make an image collection. Take the name from the sensor.
    # Filter by date based off of start_date and end_date.
    # Filter bounds based off of the ROI.
    # The map method applies an additional function as a filter; in this case,
    # a clipToROI function that crops every image result in the collection.
    # This way you only have the piece of the image that you're concerned with.
    collection = ee.ImageCollection(sensor['name']) \
                .filterDate(ee.Date(start_date), ee.Date(end_date)) \
                .filterBounds(roi) \
                ### NOTE: Does this need the lambda??
                .map( lambda x: clipToROI(x, ee.Geometry(roi)) )

    # If there are filters_before, apply them
    if filters_before is not None:
        collection = collection.filter( filters_before )

    # If there are additional functions you want to apply, put them in the
    # "modifiers" list and then they will be applied in turn using the 'map' method
    if modifiers and len(modifiers) > 0:
        for m in modifiers:
            collection = collection.map(m)

    # If there are filters_after, apply them
    if filters_after:
        collection = collection.filter( filters_after )

    # 'sensor' states the specific bands you want to take in the 'bands' value.
    # Return those bands of the image collection.
    # This is done at the end just in case other bands are used in custom (pre-)processing--
    # i.e., in the "modifiers" list
    return collection.select(sensor['bands'])

## process_datasource

In [None]:
def process_datasource(task_queue, source, sensor, export_to, export_dest, feature_list = None):
    # feature_list = ee.FeatureCollection(source['features_src'])
    feature_list = feature_list.sort(source['sort_by']).toList(feature_list.size())
    n_features = feature_list.size().getInfo()

    print("{} features have been loaded".format(n_features))

    task_list = []

    for i in range(1, n_features):
        feature_point = ee.Feature( feature_list.get(i) )

        if source['geometry'] == "point":
            feature_point = feature_point.buffer(source['size']).bounds()

        roi = feature_point.geometry()
        roi = roi.coordinates().getInfo()

        if isinstance(source['name'], str):
            source['name'] = [source['name']]

        if isinstance(sensor['prefix'], str):
            sensor['prefix'] = [sensor['prefix']]

        if 'prefix' in sensor:
            filename_parts = sensor['prefix'] + source['name']
        else:
            filename_parts = source['name']

        filename = "_".join(source['name'] + [str(i)])
        dest_path = "/".join(filename_parts + [filename])

        export_params = {
            'bucket': export_dest,
            'resolution': source['resolution'],
            'filename': filename,
            'dest_path': dest_path
        }

        task_params = {
            'action': export_single_feature,
            'id': "_".join(filename_parts + [str(i)]), # This must be unique per task, to allow to track retries
            'kwargs': {
                'roi': roi,
                'export_params': export_params,
                'sensor': sensor,
                'date_range': {'start_date': source['start_date'], 'end_date': source['end_date']}
            }
        }

        task_queue.add_task(task_params, blocking=True)

In [None]:
def process_datasource(task_queue, source, sensor, export_to, export_dest, feature_list = None)
### NOTE: We're going to remove the task_queue probably

In [None]:
# the feature_list is an ee.FeatureCollection
# This sorts the feature_list by the parameter in source['sort_by']
feature_list = feature_list.sort(source['sort_by']).toList(feature_list.size())
# get the number of features in the feature_list
n_features = feature_list.size().getInfo()

In [None]:
fc

In [None]:
fc.toList(fc.size())

In [None]:
help(fc.toList)

In [None]:
fc.size().getInfo()

In [None]:
type(fc.size())

In [None]:
help(ee.ee_number.Number)

In [None]:
task_list = []
# This variable is not used so I don't know why it's defined

In [None]:
for i in range(1, n_features):
    feature_point = ee.Feature( feature_list.get(i) )

In [None]:
feature_list = fc.toList(fc.size())

In [None]:
type(feature_list)

In [None]:
help(feature_list.get)

In [None]:
feature_list.get(0).getInfo()

In [None]:
feature_list.get(1).getInfo()

In [None]:
## CHANGE:

for i in range(0, n_features):
    # Loop through each feature. Pull out the feature--
    # need to put it inside an "ee.Feature" because otherwise
    # it's a "ComputedObject."
    feature_point = ee.Feature( feature_list.get(i) )

In [None]:
feature_point = ee.Feature( feature_list.get(0) )

In [None]:
type(feature_list.get(0))

In [None]:
if source['geometry'] == "point":
    # If the feature is a point, then create a bounding box based off
    # of the "size" attribute from 'source', using the defined
    # feature as the centroid.
    feature_point = feature_point.buffer(source['size']).bounds()

In [None]:
help(feature_point.buffer)

In [None]:
help(feature_point.bounds)

In [None]:
# Get the coordinates of feature_point as the ROI
roi = feature_point.geometry()
roi = roi.coordinates().getInfo()

In [None]:
## if type(source['name']) == str
if isinstance(source['name'], str):
    # make it into a list, so we don't error out
    source['name'] = [source['name']]
    
# same as above
if isinstance(sensor['prefix'], str):
    sensor['prefix'] = [sensor['prefix']]

In [None]:
# make a list 'filename_parts', with all the prefixes
# first (if there are prefixes), then all of the names.
# Keep in mind that at this point, we're working on a single source
# and a single sensor, so really there's just one prefix and one name;
# however, these each might be divided into parts and put into a list
# so that we can then join all the parts together later.
if 'prefix' in sensor:
    filename_parts = sensor['prefix'] + source['name']
else:
    filename_parts = source['name']

In [None]:
# Filename is the source name, underscore, then an integer
# (integer depends on which feature we're wroking on)
filename = "_".join(source['name'] + [str(i)])
# dest_path is the filename parts joined by backlashses, then the filename
dest_path = "/".join(filename_parts + [filename])

In [None]:
# Define export parameters
export_params = {
    # export bucket is one of the arguments to the overall function
    'bucket': export_dest,
    # resolution comes from the source
    'resolution': source['resolution'],
    # filename and dest_path defined above
    'filename': filename,
    'dest_path': dest_path
}

In [None]:
# Define task parameters for the async stuff
task_params = {
    # Function to run: export_single_feature
    'action': export_single_feature,
    # ID for the async stuff to track each task
    'id': "_".join(filename_parts + [str(i)]), # This must be unique per task, to allow to track retries
    'kwargs': {
        # kwargs come from the variables defined in-function
        'roi': roi,
        'export_params': export_params,
        'sensor': sensor,
        'date_range': {'start_date': source['start_date'], 'end_date': source['end_date']}
    }
}

In [None]:
# async line
task_queue.add_task(task_params, blocking=True)

In [None]:
l1 = [1, 2, 3]
l2 = [4, 5, 6]
l1 + l2

In [None]:
config['sensors']

In [None]:
sensor = config['sensors'][0]

In [None]:
config.keys()

In [None]:
config['data_list']

In [None]:
source = config['data_list'][0]

In [None]:
source['name']

## export_single_feature

In [None]:
def export_single_feature(roi=None, sensor=None, date_range=None, export_params=None):
    modifiers = None
    if sensor['type'].lower() == "opt":
        #print(sensor['type'])
        modifiers = [sentinel2CloudScore, calcCloudCoverage]

    roi_ee = ee.Geometry.Polygon(roi[0])
    image_collection = makeImageCollection(sensor, roi_ee, date_range['start_date'], date_range['end_date'], modifiers=modifiers)
    img = ee.Image(image_collection.mosaic())

    new_params = export_params.copy()
    new_params['img'] = img
    new_params['roi'] = roi

    return exportImageToGCS(**new_params)

In [None]:
# roi, sensor, date_range, export_params
export_single_feature(roi=None, sensor=None, date_range=None, export_params=None)

In [None]:
# default modifiers value
modifiers = None
# if the sensor type is "opt" (optical)
if sensor['type'].lower() == "opt":
    #print(sensor['type'])
    # then the modifiers is the following two functions from utils.
    # the only reason to run these functions is if you're getting
    # optical products (i.e. rasters)
    modifiers = [sentinel2CloudScore, calcCloudCoverage]

In [None]:
# Getting the RoI as an EE Geometry (Polygon) object
roi_ee = ee.Geometry.Polygon(roi[0])

# run the makeImageCollection function that is pulled from utils
image_collection = makeImageCollection(sensor, roi_ee, date_range['start_date'], date_range['end_date'], modifiers=modifiers)

# get a single image by mosaicing the image collection.
# this will naturally do a pixel replacement (i.e. we're flattening the products)
img = ee.Image(image_collection.mosaic())

In [None]:
# copy the export_params, then add 'img' and 'roi' key/value pairs
new_params = export_params.copy()
new_params['img'] = img
new_params['roi'] = roi

In [None]:
# run exportImageToGCS (pulled from utils) on the new_params
return exportImageToGCS(**new_params)

# TESTING (ZHENYA START HERE)

In [1]:
import ee
import yaml
import time
import os
import json
import pandas as pd
from utils import exportImageToGDrive,exportImageToGCS
from download_sen12 import *

ee.Initialize()

  with loop.timer(seconds, ref=ref) as t:


In [None]:
# def process_datasource(source, sensor, export_folder, feature_collection = None):
#     # feature_list = ee.FeatureCollection(source['features_src'])
#     feature_list = feature_collection.sort(source['sort_by']).toList(feature_collection.size())
#     n_features = feature_list.size().getInfo()

#     print("{} features have been loaded".format(n_features))

#     for i in range(0, n_features):
#         feature_point = ee.Feature( feature_list.get(i) )

# #         if source['geometry'] == "point":
# #             feature_point = feature_point.buffer(source['size']).bounds()

#         roi = feature_point.geometry()
#         roi = roi.coordinates().getInfo()

#         if isinstance(source['name'], str):
#             source['name'] = [source['name']]

#         if isinstance(sensor['prefix'], str):
#             sensor['prefix'] = [sensor['prefix']]

#         if 'prefix' in sensor:
#             filename_parts = sensor['prefix'] + source['name']
#         else:
#             filename_parts = source['name']
            
#         time_stamp = "_".join(time.ctime().split(" ")[1:])

#         filename = "_".join(source['name'] + [str(i)])
        
#         dest_path = "/".join(filename_parts + [filename])
#         print(dest_path)

#         export_params = {
# #             'drive_folder': export_folder,
#             'bucket' : export_folder,
#             'resolution': source['resolution'],
#             'filename': filename,
#             'dest_path': dest_path
#         }
        
        
        
# #         return export_single_feature(roi=roi, export_params=export_params,
# #                                      sensor=sensor,
# #                                      date_range={'start_date': source['start_date'],
# #                                                  'end_date': source['end_date']})
    
    
# def export_single_feature(roi=None, sensor=None, date_range=None, export_params=None):
#     modifiers = None
#     if sensor['type'].lower() == "opt":
#         #print(sensor['type'])
#         modifiers = [sentinel2CloudScore, calcCloudCoverage]

#     roi_ee = ee.Geometry.Polygon(roi[0])
#     image_collection = makeImageCollection(sensor, roi_ee, date_range['start_date'], date_range['end_date'], modifiers=modifiers)
#     img = image_collection.mosaic().clip(roi_ee)
#     print(img.getInfo())
    

#     new_params = export_params.copy()
#     print(new_params)
#     new_params['img'] = img
#     new_params['roi'] = roi
    
    
#     return exportImageToGCS(**new_params)


# #     return exportImageToGDrive(**new_params)

# def load_config(config_file):
#     stream = open(config_file, 'r') 
#     return yaml.load(stream)

In [2]:
config_dict = load_config('config.yml')
source = config_dict['data_list'][0]
sensor = config_dict['sensors'][0]
# export_folder = config_dict['drive_folder']
export_folder = config_dict['bucket']

print(export_folder)

project-canopy-temp-2


  return yaml.load(stream)


In [3]:
print(source['name'])

['full_polygon_test']


In [3]:
def import_aois(csv_loc):    

    df_labels = pd.read_csv(csv_loc)
    df_labels = df_labels[["center-lat","center-long","polygon","Labels combined"]]

    polygons = []
    for polygon in df_labels["polygon"]:
        polygons.append(json.loads(polygon)["coordinates"])

    return polygons

### CHANGE BELOW PATH ###
zhenya_path = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Sentinel_Data/Labelled/Tiles_v3/Polygon_List/polygons_101320.csv"
david_path = 'D:/canopy_data/csvs/polygons_101320.csv'
polygons = import_aois(david_path)

feature_id = 0 
features = []
for poly in polygons:
    # create an roi. first item in Misha's label list
    feature_id += 1 
    
    # create geometry object, create feature object, append to features list for feature collection creation 
    polys = ee.Geometry.Polygon(poly)
    feature = ee.Feature(polys,{"name":feature_id})
    features.append(feature)

fc = ee.FeatureCollection(features)

In [4]:
export = process_datasource(source, sensor, export_folder, fc)

101 features have been loaded
processing  1_full_polygon_test_Nov_12_15:04:28_2020
Inject B10
processing  2_full_polygon_test_Nov_12_15:04:29_2020
Inject B10
processing  3_full_polygon_test_Nov_12_15:04:30_2020
Inject B10
processing  4_full_polygon_test_Nov_12_15:04:31_2020
Inject B10
processing  5_full_polygon_test_Nov_12_15:04:32_2020
Inject B10
processing  6_full_polygon_test_Nov_12_15:04:33_2020
Inject B10
processing  7_full_polygon_test_Nov_12_15:04:34_2020
Inject B10
processing  8_full_polygon_test_Nov_12_15:04:35_2020
Inject B10
processing  9_full_polygon_test_Nov_12_15:04:36_2020
Inject B10
processing  10_full_polygon_test_Nov_12_15:04:37_2020
Inject B10
processing  11_full_polygon_test_Nov_12_15:04:38_2020
Inject B10
processing  12_full_polygon_test_Nov_12_15:04:39_2020
Inject B10
processing  13_full_polygon_test_Nov_12_15:04:39_2020
Inject B10
processing  14_full_polygon_test_Nov_12_15:04:40_2020
Inject B10
processing  15_full_polygon_test_Nov_12_15:04:41_2020
Inject B10
proc

  with loop.timer(seconds, ref=ref) as t:


In [6]:
while export.active():
    print(export.status(), end="\r", flush=True)

{'state': 'COMPLETED', 'description': 'test_0_Nov_11_14:54:58_2020', 'creation_timestamp_ms': 1605135299822, 'update_timestamp_ms': 1605135922839, 'start_timestamp_ms': 1605135306877, 'task_type': 'EXPORT_IMAGE', 'destination_uris': ['https://console.developers.google.com/storage/browser/project_canopy_temp/S2_CloudFree/test/'], 'attempt': 1, 'id': 'MMOHTBKSVXOZE5EMSJAOKB4N', 'name': 'projects/earthengine-legacy/operations/MMOHTBKSVXOZE5EMSJAOKB4N'}

In [5]:
feature_id = 0 
features = []
for poly in polygons:
    # create an roi. first item in Misha's label list
    feature_id += 1 
    
    # create geometry object, create feature object, append to features list for feature collection creation 
    polys = ee.Geometry.Polygon(poly)
    feature = ee.Feature(polys,{"name":feature_id})
    features.append(feature)

fc = ee.FeatureCollection(features)

In [5]:
exports = process_datasource(source, sensor, export_folder, fc)

11 features have been loaded
Inject B10
cloudFree info: {'type': 'Image', 'bands': [{'id': 'B1', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 65535}, 'dimensions': [11133, 11133], 'origin': [189243, -44528], 'crs': 'EPSG:4326', 'crs_transform': [8.983152841195215e-05, 0, 0, 0, -8.983152841195215e-05, 0]}, {'id': 'B2', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 65535}, 'dimensions': [11133, 11133], 'origin': [189243, -44528], 'crs': 'EPSG:4326', 'crs_transform': [8.983152841195215e-05, 0, 0, 0, -8.983152841195215e-05, 0]}, {'id': 'B3', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 65535}, 'dimensions': [11133, 11133], 'origin': [189243, -44528], 'crs': 'EPSG:4326', 'crs_transform': [8.983152841195215e-05, 0, 0, 0, -8.983152841195215e-05, 0]}, {'id': 'B4', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 65535}, 'dimensions': [11133, 11133], 'origin': [189243, -44528], 'crs': 'EPSG:43

  with loop.timer(seconds, ref=ref) as t:


In [None]:
# export = exports[0]

while export.active():
    print(export.status(), end="\r", flush=True)
    
    
    
# any(ele > 10 for ele in test_list)

{'state': 'RUNNING', 'description': 'test_0_Nov_11_15:20:40_2020', 'creation_timestamp_ms': 1605136842168, 'update_timestamp_ms': 1605138764844, 'start_timestamp_ms': 1605136863200, 'task_type': 'EXPORT_IMAGE', 'attempt': 1, 'id': 'YNFRYUQOSVSMPP7RSSZ7OWEP', 'name': 'projects/earthengine-legacy/operations/YNFRYUQOSVSMPP7RSSZ7OWEP'}

### Planning

Flow of our pipeline--

1. Process Datasource: Sort the feature list and go through each feature one at a time.

2. Export Single Feature: put inputs in the right format for makeImageCollection

3. Make Image Collection: Queries images, applies filters and the map functions

4. Export Single Feature (again): Merges collection, then exports it

To generalize:

1. Split feature list

2. For each feature, make the appropriate image collection

3. Mosaic

4. Export

Proposal is to make a separate image collection for each Tile ID.

1. Put a "tile list" in the config file, as well as a single polygon.

2. Go through each tile in the tile list one at a time.

3. Make an image collection containing only products with that tile.

4. Mosaic that image collection.

5. If any parts of the mosaic lie outside the polygon, clip it. (<-- this part needs testing to see precisely what to do)

6. Export the mosaic tile. (Then repeat for each tile)

In [22]:
def process_datasource_tiles(source, sensor, export_folder, tile_list, main_polygon=None, pre_mosaic_sort='CLOUDY_PERCENTAGE'):

    n_tiles = len(tile_list)

    print(f'{n_tiles} tiles have been loaded')

    exports = []
    
    start_date = source['start_date']
    end_date = source['end_date']
    
    main_collection = ee.ImageCollection(sensor['name']) \
                        .filterDate(ee.Date(start_date), ee.Date(end_date))
    
    if main_polygon:
        main_polygon = ee.Geometry.Polygon(main_polygon)
        main_collection = main_collection.filterBounds(main_polygon_ee)

    if isinstance(source['name'], str):
        source['name'] = [source['name']]

    if 'prefix' in sensor:
        if isinstance(sensor['prefix'], str):
            sensor['prefix'] = [sensor['prefix']]
        filename_parts = sensor['prefix'] + source['name']
    else:
        filename_parts = source['name']

    for i, tile in enumerate(tile_list):
        print(f'Processing tile {tile}')
        
        time_stamp = "_".join(time.ctime().split(" ")[1:])
        filename = "_".join([str(i + 1)] + source['name'] + [time_stamp])
        print("processing ",filename)
        dest_path = "/".join(filename_parts + [filename])

        export_params = {
            'bucket': export_folder,
            'resolution': source['resolution'],
            'filename': filename,
            'dest_path': dest_path
        }

        export = export_single_tile(
            tile=tile,
            main_collection=main_collection,
            main_polygon=main_polygon,
            sensor=sensor,
            export_params=export_params,
            sort_by=pre_mosaic_sort
        )

        exports.append(export)

    return exports

In [48]:
def export_single_tile(tile, main_collection, main_polygon=None, sensor=None, export_params=None, sort_by='CLOUDY_PERCENTAGE'):
    modifiers = []
    if sensor['name'].lower() == "copernicus/s2_sr":
        print('Inject B10')
        modifiers.append(inject_B10)
    if sensor['type'].lower() == "opt":
        #print(sensor['type'])
        modifiers += [sentinel2CloudScore, calcCloudCoverage, sentinel2ProjectShadows, computeQualityScore]

    print('Making tile collection')
    tile_collection = makeTileCollection(sensor, tile, main_collection, modifiers=modifiers)
    return tile_collection
    tile_collection = tile_collection.sort(sort_by)

    print('Making mosaic')
    cloudFree = mergeCollection(tile_collection)
    if main_polygon:
        print('Clipping to polygon')
        cloudFree = cloudFree.clip(main_polygon)
    cloudFree = cloudFree.reproject('EPSG:4326', None, 10)

    new_params = export_params.copy()
    new_params['img'] = cloudFree
    new_params['roi'] = main_polygon
    new_params['sensor_name'] = sensor['name'].lower()
    
    return cloudFree
    
    #return exportImageToGCS(**new_params)

In [33]:
def makeTileCollection(sensor, tile, main_collection, modifiers=[]):
    filters_before, filters_after = makeFilterList(sensor)

    collection = main_collection.filterMetadata('system:index', 'contains', tile)
                
    if filters_before is not None:
        collection = collection.filter( filters_before )

    if modifiers and len(modifiers) > 0:
        for m in modifiers:
            collection = collection.map(m)

    if filters_after:
        collection = collection.filter( filters_after )

    return collection

In [42]:
def exportImageToGCS(img=None, roi=None, bucket=None, filename=None, dest_path=None, resolution=10, start=True, sensor_name=None):
    ## same as in the JS version

    print('Exporting image')
    
    if sensor_name == 'copernicus/s2':
        img = img.select(['B4', 'B3', 'B2'])
    elif sensor_name == 'copernicus/s2_sr':
        img = img.select(['TCI_R', 'TCI_G', 'TCI_B'])

    if roi:
        print('Using ROI')
        export = ee.batch.Export.image.toCloudStorage(
          image=img,
          description=filename,
          scale=resolution,
          region=roi,
          fileNamePrefix=dest_path,
          bucket=bucket,
          maxPixels=1e13
        )
    else:
        print('Not using ROI')
        export = ee.batch.Export.image.toCloudStorage(
            image=img,
            description=filename,
            scale=resolution,
            fileNamePrefix=dest_path,
            bucket=bucket,
            maxPixels=1e13
        )
    
#     print()

    if start:
        export.start()

    return(export)

In [46]:
def mergeCollection(imgC, keepThresh=5, filterBy='CLOUDY_PERCENTAGE', filterType='less_than', mosaicBy='cloudShadowScore'):
    # Select the best images, which are below the cloud free threshold, sort them in reverse order (worst on top) for mosaicing
    ## same as the JS version
    best = imgC.filterMetadata(filterBy, filterType, keepThresh).sort(filterBy, False)
    print('Size of best:', best.size().getInfo())
    #print('Info on first image of collection:', imgC.first().getInfo())
    filtered = imgC.qualityMosaic(mosaicBy)
    print('Type of filtered:', type(filtered))

    # Add the quality mosaic to fill in any missing areas of the ROI which aren't covered by good images
    newC = ee.ImageCollection.fromImages( [filtered, best.mosaic()] )
    
    print('Size of newC:', newC.size().getInfo())

    #return ee.Image(newC.mosaic())

In [36]:
tile_list = ["T32NMK","T32NML","T32NMM"]

In [37]:
config_dict = load_config('config.yml')
source = config_dict['data_list'][0]
sensor = config_dict['sensors'][0]
# export_folder = config_dict['drive_folder']
export_folder = config_dict['bucket']

print(export_folder)

project-canopy-temp-2


  return yaml.load(stream)


In [38]:
print(source)

{'name': ['pipeline_v2_test_1'], 'start_date': '2019-01-01', 'end_date': '2020-12-31', 'geometry': 'point', 'size': 20000, 'resolution': 10, 'sort_by': 'name', 'features_src': 'ft:19Vexm10pJcAZ8tTVbl4j0HA8w2muyPPz6-cyvdxI', 'sensors': [0, 1, 2]}


In [39]:
print(sensor)

{'name': 'COPERNICUS/S2_SR', 'prefix': 'S2_CloudFree', 'type': 'opt', 'bands': ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B10', 'B11', 'B12', 'TCI_R', 'TCI_G', 'TCI_B'], 'filters_after': [{'CLOUDY_PERCENTAGE': {'lte': 10}}]}


In [43]:
process_datasource_tiles(source, sensor, export_folder, tile_list)

3 tiles have been loaded
Processing tile T32NMK
processing  1_pipeline_v2_test_1_Nov_13_14:01:47_2020
Inject B10
Making tile collection
Making mosaic
Exporting image
Not using ROI
Processing tile T32NML
processing  2_pipeline_v2_test_1_Nov_13_14:01:49_2020
Inject B10
Making tile collection
Making mosaic
Exporting image
Not using ROI
Processing tile T32NMM
processing  3_pipeline_v2_test_1_Nov_13_14:01:49_2020
Inject B10
Making tile collection
Making mosaic
Exporting image
Not using ROI


[<Task EXPORT_IMAGE: 1_pipeline_v2_test_1_Nov_13_14:01:47_2020 (UNSUBMITTED)>,
 <Task EXPORT_IMAGE: 2_pipeline_v2_test_1_Nov_13_14:01:49_2020 (UNSUBMITTED)>,
 <Task EXPORT_IMAGE: 3_pipeline_v2_test_1_Nov_13_14:01:49_2020 (UNSUBMITTED)>]

  with loop.timer(seconds, ref=ref) as t:


In [49]:
collections = process_datasource_tiles(source, sensor, export_folder, tile_list)

3 tiles have been loaded
Processing tile T32NMK
processing  1_pipeline_v2_test_1_Nov_13_14:13:43_2020
Inject B10
Making tile collection
Processing tile T32NML
processing  2_pipeline_v2_test_1_Nov_13_14:13:43_2020
Inject B10
Making tile collection
Processing tile T32NMM
processing  3_pipeline_v2_test_1_Nov_13_14:13:43_2020
Inject B10
Making tile collection


  with loop.timer(seconds, ref=ref) as t:


In [50]:
coll = collections[0]

In [51]:
coll.size().getInfo()

EEException: Collection.reduceColumns: Error in map(ID=20190105T094401_20190105T095815_T32NMK):
Image.clip: The area for image clipping must be a geometry, a Feature or a FeatureCollection.

In [52]:
tile = tile_list[0]

tile

'T32NMK'

In [55]:
start_date = source['start_date']
end_date = source['end_date']
    
main_collection = ee.ImageCollection(sensor['name']) \
                  .filterDate(ee.Date(start_date), ee.Date(end_date))

coll = main_collection.filterMetadata('system:index', 'contains', tile)

  with loop.timer(seconds, ref=ref) as t:


In [56]:
coll.getInfo()

Traceback (most recent call last):
  File "C:\Anaconda3\envs\cloud_removal\lib\site-packages\gevent\_ffi\loop.py", line 269, in python_check_callback
    def python_check_callback(self, watcher_ptr): # pylint:disable=unused-argument
KeyboardInterrupt
2020-11-13T22:17:41Z


KeyboardInterrupt: 

In [57]:
coll.first().getInfo()

141

In [58]:
type(coll)

ee.imagecollection.ImageCollection

In [59]:
type(coll.first())

ee.image.Image

In [61]:
img = coll.first()
print(img.getInfo())

{'type': 'Image', 'bands': [{'id': 'B1', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 65535}, 'dimensions': [1830, 1830], 'crs': 'EPSG:32632', 'crs_transform': [60, 0, 399960, 0, -60, 500040]}, {'id': 'B2', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 65535}, 'dimensions': [10980, 10980], 'crs': 'EPSG:32632', 'crs_transform': [10, 0, 399960, 0, -10, 500040]}, {'id': 'B3', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 65535}, 'dimensions': [10980, 10980], 'crs': 'EPSG:32632', 'crs_transform': [10, 0, 399960, 0, -10, 500040]}, {'id': 'B4', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 65535}, 'dimensions': [10980, 10980], 'crs': 'EPSG:32632', 'crs_transform': [10, 0, 399960, 0, -10, 500040]}, {'id': 'B5', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 65535}, 'dimensions': [5490, 5490], 'crs': 'EPSG:32632', 'crs_transform': [20, 0, 399960, 0, -20, 500040]}, {

In [62]:
coll = coll.map(inject_B10)

  with loop.timer(seconds, ref=ref) as t:


In [63]:
coll.size().getInfo()

Traceback (most recent call last):
  File "C:\Anaconda3\envs\cloud_removal\lib\site-packages\gevent\_ffi\loop.py", line 269, in python_check_callback
    def python_check_callback(self, watcher_ptr): # pylint:disable=unused-argument
KeyboardInterrupt
2020-11-13T22:21:08Z


KeyboardInterrupt: 

In [65]:
modifiers = [sentinel2CloudScore, calcCloudCoverage, sentinel2ProjectShadows, computeQualityScore]

for m in modifiers:
    coll = coll.map(m)

In [66]:
coll.size().getInfo()

Traceback (most recent call last):
  File "C:\Anaconda3\envs\cloud_removal\lib\site-packages\gevent\_ffi\loop.py", line 269, in python_check_callback
    def python_check_callback(self, watcher_ptr): # pylint:disable=unused-argument
KeyboardInterrupt
2020-11-13T22:22:29Z


KeyboardInterrupt: 

In [67]:
modifiers = [inject_B10, sentinel2CloudScore, calcCloudCoverage, sentinel2ProjectShadows, computeQualityScore]

coll = makeTileCollection(sensor, tile, main_collection, modifiers)

  with loop.timer(seconds, ref=ref) as t:


In [68]:
coll.getInfo()

Traceback (most recent call last):
  File "C:\Anaconda3\envs\cloud_removal\lib\site-packages\gevent\_ffi\loop.py", line 269, in python_check_callback
    def python_check_callback(self, watcher_ptr): # pylint:disable=unused-argument
KeyboardInterrupt
2020-11-13T22:23:42Z


KeyboardInterrupt: 

In [69]:
collections = process_datasource_tiles(source, sensor, export_folder, tile_list)

3 tiles have been loaded
Processing tile T32NMK
processing  1_pipeline_v2_test_1_Nov_13_14:23:55_2020
Inject B10
Making tile collection
Processing tile T32NML
processing  2_pipeline_v2_test_1_Nov_13_14:23:55_2020
Inject B10
Making tile collection
Processing tile T32NMM
processing  3_pipeline_v2_test_1_Nov_13_14:23:55_2020
Inject B10
Making tile collection


  with loop.timer(seconds, ref=ref) as t:


In [70]:
collections[0].getInfo()

137

In [71]:
collections

[<ee.imagecollection.ImageCollection at 0x1218b9f11c0>,
 <ee.imagecollection.ImageCollection at 0x1218ba38700>,
 <ee.imagecollection.ImageCollection at 0x1218ba4acd0>]

In [72]:
coll = collections[0]

coll.getInfo()

EEException: Error in map(ID=20190105T094401_20190105T095815_T32NMK):
Image.clip: The area for image clipping must be a geometry, a Feature or a FeatureCollection.

In [73]:
def makeTileCollection(sensor, tile, main_collection, modifiers=[]):
    filters_before, filters_after = makeFilterList(sensor)

    print('1:', main_collection.size().getInfo())
    
    collection = main_collection.filterMetadata('system:index', 'contains', tile)
                
    print('2:', collection.size().getInfo())
        
    if filters_before is not None:
        collection = collection.filter( filters_before )

    if modifiers and len(modifiers) > 0:
        for m in modifiers:
            collection = collection.map(m)
            print(f'3 {m}:', collection.size().getInfo())

    if filters_after:
        collection = collection.filter( filters_after )
        print('4:', collection.size().getInfo())

    return collection

In [74]:
main_collection = ee.ImageCollection(sensor['name']) \
                  .filterDate(ee.Date('2020-01-01'), ee.Date('2020-01-07'))

In [75]:
coll = makeTileCollection(sensor, tile, main_collection, modifiers)

1: 50035
2: 1
3 <function inject_B10 at 0x000001218B1C0700>: 1
3 <function sentinel2CloudScore at 0x000001218B1ABD30>: 1
3 <function calcCloudCoverage at 0x000001218B1C0550>: 1
3 <function sentinel2ProjectShadows at 0x000001218B1C09D0>: 1
3 <function computeQualityScore at 0x000001218B1C0790>: 1


EEException: Collection.reduceColumns: Error in map(ID=20200105T094309_20200105T095703_T32NMK):
Image.clip: The area for image clipping must be a geometry, a Feature or a FeatureCollection.

  with loop.timer(seconds, ref=ref) as t:
