In [1]:
# Cloud Mask 2.0 

import ee
import yaml
import time
import os
import json
from argparse import ArgumentParser
from utils import clipToROI, exportImageCollectionToGCS, exportImageToGCS, sentinel2CloudScore, calcCloudCoverage
from utils import GEETaskManager

from gevent.fileobject import FileObjectThread

# Polygon Import from Misha ROI List

import pandas as pd

# Functions for Active Run of Cloud Mask 2.0 

from download_sen12 import *

In [6]:
from download_sen12 import print_me

In [7]:
print_me()

wahoooo!


In [10]:
config_file = "config.yml"

In [11]:
stream = open(config_file, 'r') 

In [12]:
config = yaml.load(stream)

In [13]:
config["data_list"][0]

{'name': ['pipeline_v2_test_5'],
 'start_date': '2019-01-01',
 'end_date': '2020-12-31',
 'geometry': 'point',
 'size': 20000,
 'resolution': 10,
 'sort_by': 'name',
 'features_src': 'ft:19Vexm10pJcAZ8tTVbl4j0HA8w2muyPPz6-cyvdxI',
 'sensors': [0, 1, 2]}

In [None]:
config

# Initialize EE

In [None]:
ee.Initialize()

# Creating New Feature Collection To Use with CM_v2

In [None]:
def import_aois(csv_loc):    

    df_labels = pd.read_csv(csv_loc)
    df_labels = df_labels[["center-lat","center-long","polygon","Labels combined"]]

    polygons = []
    for polygon in df_labels["polygon"]:
        polygons.append(json.loads(polygon)["coordinates"])

    return polygons

polygons = import_aois("D:/canopy_data/csvs/polygons_101320.csv")

feature_id = 0 
features = []
for poly in polygons[0:3]:
    # create an roi. first item in Misha's label list
    feature_id += 1 
    
    # create geometry object, create feature object, append to features list for feature collection creation 
    polys = ee.Geometry.Polygon(poly)
    feature = ee.Feature(polys,{"name":feature_id})
    features.append(feature)

fc = ee.FeatureCollection(features)

In [None]:
fc

In [None]:
task_queue = GEETaskManager(n_workers=config['max_tasks'], max_retry=config['max_retry'], wake_on_task=True, log_file=config['log_file'], process_timeout=config['task_timeout'])
task_queue.register_monitor(monitor_tasks)

if os.path.exists('task_log.json'):
    task_log = load_task_log(filename='task_log.json')
    task_queue.set_task_log(task_log)

for data_list in config['data_list'][0:1]:
    for sensor_idx in data_list['sensors']:
        sensor = config['sensors'][sensor_idx]
        print(sensor)
        tasks = process_datasource(task_queue, data_list, sensor, config['export_to'], config['export_dest'], feature_list = fc)

print("Waiting for completion...")
task_queue.wait_till_done()

In [None]:
d = {'a': 1, 'b': 2, 'c': 3}

d.keys()

In [None]:
list(d.keys())[0]

In [None]:
list(d.values())[0]

In [None]:
d.items()

In [None]:
e = {'d': d, 'e': 0}

e

In [None]:
list(list(e.values())[0].keys())[0]

In [None]:
task_queue = GEETaskManager(n_workers=config['max_tasks'], max_retry=config['max_retry'], wake_on_task=True, log_file=config['log_file'], process_timeout=config['task_timeout'])
task_queue.register_monitor(monitor_tasks)

if os.path.exists('task_log.json'):
    task_log = load_task_log(filename='task_log.json')
    task_queue.set_task_log(task_log)

for data_list in config['data_list']:
    for sensor_idx in data_list['sensors']:
        sensor = config['sensors'][sensor_idx]
        tasks = process_datasource(task_queue, data_list, sensor, config['export_to'], config['export_dest'])

print("Waiting for completion...")
task_queue.wait_till_done()

In [None]:
!pwd

# makeFilterList

In [None]:
def makeFilterList(sensor):
    filters_before = None
    filters_after = None

    def _build_filters(filter_list):
        filters = []
        for f in filter_list:
            key = list(f.keys())[0]
            op = list(list(f.values())[0].keys())[0]
            val = list(list(f.values())[0].values())[0]
            filters.append(getattr(ee.Filter, op)(key, val))

        return filters

    if 'filters_before' in sensor:
        filters_before = _build_filters(sensor['filters_before'])

    if 'filters_after' in sensor:
        filters_after = _build_filters(sensor['filters_after'])

    return filters_before, filters_after

In [None]:
type(config)

In [None]:
config.keys()

In [None]:
config

In [None]:
config['sensors']

In [None]:
sensor = config['sensors'][0]
sensor

In [None]:
# Default values
filters_before = None
filters_after = None

In [None]:
# Sub function
def _build_filters(filter_list):
    # filter_list is a list of dictionaries. Includes the attributes for filtering an image collection
    filters = []
    # for each dict in filter_list
    # example: {'CLOUDY_PERCENTAGE': {'lte': 10}}
    for f in filter_list:
        # key is the first key of the dict -- the feature you're trying to filter by
        # example: 'CLOUDY_PERCENTAGE'
        key = list(f.keys())[0]
        # op is the key of the nested dictionary
        # example: 'lte'
        op = list(list(f.values())[0].keys())[0]
        # val is the value of the nested dictionary
        # example: 10
        val = list(list(f.values())[0].values())[0]
        # Make an ee.Filter object that matches the input filter dict
        # example: ee.Filter.lte('CLOUDY_PERCENTAGE', 10)
        # This will then get applied to an image_collection object
        filters.append(getattr(ee.Filter, op)(key, val))

In [None]:
help(ee.Filter)

In [None]:
# Because of the sorting (probably), you may want to apply filters specifically
# before or after creating the image_collection object.
# So we have separate filter lists for both before and after.
if 'filters_before' in sensor:
    filters_before = _build_filters(sensor['filters_before'])

if 'filters_after' in sensor:
    filters_after = _build_filters(sensor['filters_after'])
    
# So at the end, we build a list of ee.Filter objects based off of the sensor
# values for its 'filters_before' and 'filters_after' keys. If the sensor
# lacks one or both such keys, the filters_before and filters_after retain
# their default None value (i.e. no filters get applied).

In [None]:
ee.Initialize()

In [None]:
makeFilterList(sensor)

## makeImageCollection

In [None]:
def makeImageCollection(sensor, roi, start_date, end_date, modifiers=[]):
    # Make the filters based off of the previous function
    filters_before, filters_after = makeFilterList(sensor)

    # Make an image collection. Take the name from the sensor.
    # Filter by date based off of start_date and end_date.
    # Filter bounds based off of the ROI.
    # The map method applies an additional function as a filter; in this case,
    # a clipToROI function that crops every image result in the collection.
    # This way you only have the piece of the image that you're concerned with.
    collection = ee.ImageCollection(sensor['name']) \
                .filterDate(ee.Date(start_date), ee.Date(end_date)) \
                .filterBounds(roi) \
                ### NOTE: Does this need the lambda??
                .map( lambda x: clipToROI(x, ee.Geometry(roi)) )

    # If there are filters_before, apply them
    if filters_before is not None:
        collection = collection.filter( filters_before )

    # If there are additional functions you want to apply, put them in the
    # "modifiers" list and then they will be applied in turn using the 'map' method
    if modifiers and len(modifiers) > 0:
        for m in modifiers:
            collection = collection.map(m)

    # If there are filters_after, apply them
    if filters_after:
        collection = collection.filter( filters_after )

    # 'sensor' states the specific bands you want to take in the 'bands' value.
    # Return those bands of the image collection.
    # This is done at the end just in case other bands are used in custom (pre-)processing--
    # i.e., in the "modifiers" list
    return collection.select(sensor['bands'])

## process_datasource

In [None]:
def process_datasource(task_queue, source, sensor, export_to, export_dest, feature_list = None):
    # feature_list = ee.FeatureCollection(source['features_src'])
    feature_list = feature_list.sort(source['sort_by']).toList(feature_list.size())
    n_features = feature_list.size().getInfo()

    print("{} features have been loaded".format(n_features))

    task_list = []

    for i in range(1, n_features):
        feature_point = ee.Feature( feature_list.get(i) )

        if source['geometry'] == "point":
            feature_point = feature_point.buffer(source['size']).bounds()

        roi = feature_point.geometry()
        roi = roi.coordinates().getInfo()

        if isinstance(source['name'], str):
            source['name'] = [source['name']]

        if isinstance(sensor['prefix'], str):
            sensor['prefix'] = [sensor['prefix']]

        if 'prefix' in sensor:
            filename_parts = sensor['prefix'] + source['name']
        else:
            filename_parts = source['name']

        filename = "_".join(source['name'] + [str(i)])
        dest_path = "/".join(filename_parts + [filename])

        export_params = {
            'bucket': export_dest,
            'resolution': source['resolution'],
            'filename': filename,
            'dest_path': dest_path
        }

        task_params = {
            'action': export_single_feature,
            'id': "_".join(filename_parts + [str(i)]), # This must be unique per task, to allow to track retries
            'kwargs': {
                'roi': roi,
                'export_params': export_params,
                'sensor': sensor,
                'date_range': {'start_date': source['start_date'], 'end_date': source['end_date']}
            }
        }

        task_queue.add_task(task_params, blocking=True)

In [None]:
def process_datasource(task_queue, source, sensor, export_to, export_dest, feature_list = None)
### NOTE: We're going to remove the task_queue probably

In [None]:
# the feature_list is an ee.FeatureCollection
# This sorts the feature_list by the parameter in source['sort_by']
feature_list = feature_list.sort(source['sort_by']).toList(feature_list.size())
# get the number of features in the feature_list
n_features = feature_list.size().getInfo()

In [None]:
fc

In [None]:
fc.toList(fc.size())

In [None]:
help(fc.toList)

In [None]:
fc.size().getInfo()

In [None]:
type(fc.size())

In [None]:
help(ee.ee_number.Number)

In [None]:
task_list = []
# This variable is not used so I don't know why it's defined

In [None]:
for i in range(1, n_features):
    feature_point = ee.Feature( feature_list.get(i) )

In [None]:
feature_list = fc.toList(fc.size())

In [None]:
type(feature_list)

In [None]:
help(feature_list.get)

In [None]:
feature_list.get(0).getInfo()

In [None]:
feature_list.get(1).getInfo()

In [None]:
## CHANGE:

for i in range(0, n_features):
    # Loop through each feature. Pull out the feature--
    # need to put it inside an "ee.Feature" because otherwise
    # it's a "ComputedObject."
    feature_point = ee.Feature( feature_list.get(i) )

In [None]:
feature_point = ee.Feature( feature_list.get(0) )

In [None]:
type(feature_list.get(0))

In [None]:
if source['geometry'] == "point":
    # If the feature is a point, then create a bounding box based off
    # of the "size" attribute from 'source', using the defined
    # feature as the centroid.
    feature_point = feature_point.buffer(source['size']).bounds()

In [None]:
help(feature_point.buffer)

In [None]:
help(feature_point.bounds)

In [None]:
# Get the coordinates of feature_point as the ROI
roi = feature_point.geometry()
roi = roi.coordinates().getInfo()

In [None]:
## if type(source['name']) == str
if isinstance(source['name'], str):
    # make it into a list, so we don't error out
    source['name'] = [source['name']]
    
# same as above
if isinstance(sensor['prefix'], str):
    sensor['prefix'] = [sensor['prefix']]

In [None]:
# make a list 'filename_parts', with all the prefixes
# first (if there are prefixes), then all of the names.
# Keep in mind that at this point, we're working on a single source
# and a single sensor, so really there's just one prefix and one name;
# however, these each might be divided into parts and put into a list
# so that we can then join all the parts together later.
if 'prefix' in sensor:
    filename_parts = sensor['prefix'] + source['name']
else:
    filename_parts = source['name']

In [None]:
# Filename is the source name, underscore, then an integer
# (integer depends on which feature we're wroking on)
filename = "_".join(source['name'] + [str(i)])
# dest_path is the filename parts joined by backlashses, then the filename
dest_path = "/".join(filename_parts + [filename])

In [None]:
# Define export parameters
export_params = {
    # export bucket is one of the arguments to the overall function
    'bucket': export_dest,
    # resolution comes from the source
    'resolution': source['resolution'],
    # filename and dest_path defined above
    'filename': filename,
    'dest_path': dest_path
}

In [None]:
# Define task parameters for the async stuff
task_params = {
    # Function to run: export_single_feature
    'action': export_single_feature,
    # ID for the async stuff to track each task
    'id': "_".join(filename_parts + [str(i)]), # This must be unique per task, to allow to track retries
    'kwargs': {
        # kwargs come from the variables defined in-function
        'roi': roi,
        'export_params': export_params,
        'sensor': sensor,
        'date_range': {'start_date': source['start_date'], 'end_date': source['end_date']}
    }
}

In [None]:
# async line
task_queue.add_task(task_params, blocking=True)

In [None]:
l1 = [1, 2, 3]
l2 = [4, 5, 6]
l1 + l2

In [None]:
config['sensors']

In [None]:
sensor = config['sensors'][0]

In [None]:
config.keys()

In [None]:
config['data_list']

In [None]:
source = config['data_list'][0]

In [None]:
source['name']

## export_single_feature

In [None]:
def export_single_feature(roi=None, sensor=None, date_range=None, export_params=None):
    modifiers = None
    if sensor['type'].lower() == "opt":
        #print(sensor['type'])
        modifiers = [sentinel2CloudScore, calcCloudCoverage]

    roi_ee = ee.Geometry.Polygon(roi[0])
    image_collection = makeImageCollection(sensor, roi_ee, date_range['start_date'], date_range['end_date'], modifiers=modifiers)
    img = ee.Image(image_collection.mosaic())

    new_params = export_params.copy()
    new_params['img'] = img
    new_params['roi'] = roi

    return exportImageToGCS(**new_params)

In [None]:
# roi, sensor, date_range, export_params
export_single_feature(roi=None, sensor=None, date_range=None, export_params=None)

In [None]:
# default modifiers value
modifiers = None
# if the sensor type is "opt" (optical)
if sensor['type'].lower() == "opt":
    #print(sensor['type'])
    # then the modifiers is the following two functions from utils.
    # the only reason to run these functions is if you're getting
    # optical products (i.e. rasters)
    modifiers = [sentinel2CloudScore, calcCloudCoverage]

In [None]:
# Getting the RoI as an EE Geometry (Polygon) object
roi_ee = ee.Geometry.Polygon(roi[0])

# run the makeImageCollection function that is pulled from utils
image_collection = makeImageCollection(sensor, roi_ee, date_range['start_date'], date_range['end_date'], modifiers=modifiers)

# get a single image by mosaicing the image collection.
# this will naturally do a pixel replacement (i.e. we're flattening the products)
img = ee.Image(image_collection.mosaic())

In [None]:
# copy the export_params, then add 'img' and 'roi' key/value pairs
new_params = export_params.copy()
new_params['img'] = img
new_params['roi'] = roi

In [None]:
# run exportImageToGCS (pulled from utils) on the new_params
return exportImageToGCS(**new_params)

# TESTING (ZHENYA START HERE)

In [1]:
import ee
import yaml
import time
import os
import json
import pandas as pd
from utils import exportImageToGDrive,exportImageToGCS
from download_sen12 import *

ee.Initialize()

In [2]:
def import_aois(csv_loc,Full_Congo_Pull = False):
    features = []
    polygons = []
    
    if Full_Congo_Pull:
        df_congo_tile_list = pd.read_csv(csv_loc)
        for index,tile,polygon in df_congo_tile_list.itertuples():
            poly_obj = ee.Geometry.Polygon(json.loads(polygon))
            feature = ee.Feature(poly_obj,{"name":tile})
            features.append(feature)
            
    else:
        feature_id = 0 
        
        df_labels = pd.read_csv(csv_loc)
        df_labels = df_labels[["center-lat","center-long","polygon","Labels combined"]]

        for polygon in df_labels["polygon"]:
            polygons.append(json.loads(polygon)["coordinates"])
            
        for poly in polygons:
            # create an roi. first item in Misha's label list
            feature_id += 1 
            # create geometry object, create feature object, append to features list for feature collection creation 
            polys = ee.Geometry.Polygon(poly)
            feature = ee.Feature(polys,{"name":feature_id})
            features.append(feature)
            
    return ee.FeatureCollection(features) 


# ### CHANGE BELOW PATH ###
zhenya_label_path = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Sentinel_Data/Labelled/Tiles_v3/Polygon_List/polygons_101320.csv"
# david__label_path = 'D:/canopy_data/csvs/polygons_101320.csv'
zhenya_tilelist_path = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Geometry/tile_lists/shub_tile_list/tiles_polygons_full.csv"

In [None]:
config_dict = load_config('config.yml')
source = config_dict['data_list'][0]
sensor = config_dict['sensors'][0]
# export_folder = config_dict['drive_folder']
export_folder = config_dict['bucket']

fc = import_aois(zhenya_label_path,Full_Congo_Pull = False)

In [4]:
exports = process_datasource(source, sensor, export_folder, fc)

101 features have been loaded


In [6]:
images

[(<ee.image.Image at 0x13d039490>, <ee.image.Image at 0x13d3a2070>),
 (<ee.image.Image at 0x13d13fd30>, <ee.image.Image at 0x13d13fcd0>),
 (<ee.image.Image at 0x13d159d00>, <ee.image.Image at 0x13d159ca0>)]

In [7]:
import folium
import geemap.eefolium as gmap

TCI_RGB = ['TCI_R', 'TCI_G', 'TCI_B']
vis = {'bands': TCI_RGB, "gamma": [2, 2, 2]}


def visualize_geo(coords):
    Map = gmap.Map()
    geo_obj = ee.Geometry.Polygon(coords)
    Map.centerObject(geo_obj,3)
    Map.add_layer(geo_obj, {}, 'default display')
    Map.add_child(folium.LayerControl())
    return Map

def visualize_raster(img):
    Map = gmap.Map()
    Map.centerObject(img,10)
    Map.addLayer(img, vis)
    Map.add_child(folium.LayerControl())
    return Map

In [7]:
visualize_raster(images[0][0])

In [8]:
# With Pre-Filter Sort Removed

# visualize_raster(images[0][1])

In [15]:
# Cloud Pixel Percentage Merge 1

visualize_raster(images[0][1])

In [16]:
# Cloud Pixel Percentage Merge 2

visualize_raster(images[1][1])

In [17]:
# Cloud Pixel Percentage Merge 3

visualize_raster(images[2][1])

# Visualizing Sort Comparison

In [8]:
visualize_raster(images[0][0])

In [9]:
visualize_raster(images[0][1])

In [10]:
visualize_raster(images[1][0])

In [11]:
visualize_raster(images[1][1])

In [12]:
visualize_raster(images[2][0])

In [5]:
visualize_raster(images[2][1])

NameError: name 'visualize_raster' is not defined

In [19]:
name = 'manual_image_export' 
time_stamp = "_".join(time.ctime().split(" ")[1:])
filename = "_".join([name] + [time_stamp])
print(filename)

def export_single_image(img,dest_path = 'S2_CloudFree/s2cloudless_polygon_export/',filename = 'zhenya_mosaic_test' ):
    
    time_stamp = "_".join(time.ctime().split(" ")[1:])
    filename = "_".join([str(0)] + [name] + [time_stamp])
    dest_path_full = dest_path + filename
    export = ee.batch.Export.image.toCloudStorage(
      image=img,
      description=filename,
      scale=10,
      fileNamePrefix=dest_path_full,
      bucket='project-canopy-temp-2',
      maxPixels=1e13
    )
    export.start()

manual_image_export_Nov_17_18:40:27_2020


In [20]:
export_single_image(images[0][1])

### Planning

Flow of our pipeline--

1. Process Datasource: Sort the feature list and go through each feature one at a time.

2. Export Single Feature: put inputs in the right format for makeImageCollection

3. Make Image Collection: Queries images, applies filters and the map functions

4. Export Single Feature (again): Merges collection, then exports it

To generalize:

1. Split feature list

2. For each feature, make the appropriate image collection

3. Mosaic

4. Export

Proposal is to make a separate image collection for each Tile ID.

1. Put a "tile list" in the config file, as well as a single polygon.

2. Go through each tile in the tile list one at a time.

3. Make an image collection containing only products with that tile.

4. Mosaic that image collection.

5. If any parts of the mosaic lie outside the polygon, clip it. (<-- this part needs testing to see precisely what to do)

6. Export the mosaic tile. (Then repeat for each tile)

# Tile-based approach

In [109]:
import ee
import yaml
import time
import os
import json
import pandas as pd
from utils import exportImageToGDrive,exportImageToGCS
from download_sen12 import *

ee.Initialize()

In [110]:
# def process_datasource_tiles(source, sensor, export_folder, tile_list, main_polygon=None, pre_mosaic_sort='CLOUDY_PERCENTAGE'):

#     n_tiles = len(tile_list)

#     print(f'{n_tiles} tiles have been loaded')

#     exports = []
    
# #     mosaic_imgs = []
    
#     start_date = source['start_date']
#     end_date = source['end_date']
    
#     main_collection = ee.ImageCollection(sensor['name']) \
#                         .filterDate(ee.Date(start_date), ee.Date(end_date))
    
#     if main_polygon:
#         main_polygon = ee.Geometry.Polygon(main_polygon)
#         main_collection = main_collection.filterBounds(main_polygon)

#     if isinstance(source['name'], str):
#         source['name'] = [source['name']]

#     if 'prefix' in sensor:
#         if isinstance(sensor['prefix'], str):
#             sensor['prefix'] = [sensor['prefix']]
#         filename_parts = sensor['prefix'] + source['name']
#     else:
#         filename_parts = source['name']

#     for i, tile in enumerate(tile_list):
#         print(f'Processing tile {tile}')
        
#         time_stamp = "_".join(time.ctime().split(" ")[1:])
#         filename = "_".join([str(i + 1)] + source['name'] + [time_stamp])
#         print("processing ",filename)
#         dest_path = "/".join(filename_parts + [filename])

#         export_params = {
#             'bucket': export_folder,
#             'resolution': source['resolution'],
#             'filename': filename,
#             'dest_path': dest_path
#         }

#         export = export_single_tile(
#             tile=tile,
#             main_collection=main_collection,
#             sensor=sensor,
#             export_params=export_params,
#             sort_by=pre_mosaic_sort
#         )

#         exports.append(export)
# #         mosaic_imgs.append(mosaic)
        


#     return exports


In [111]:
# def export_single_tile(tile, main_collection, sensor=None, export_params=None, sort_by='CLOUDY_PERCENTAGE'):
#     modifiers = []
#     if sensor['name'].lower() == "copernicus/s2_sr":
#         print('Inject B10')
#         modifiers.append(inject_B10)
#     if sensor['type'].lower() == "opt":
#         #print(sensor['type'])
#         modifiers += [sentinel2CloudScore, calcCloudCoverage, sentinel2ProjectShadows, computeQualityScore]

#     print('Making tile collection')
#     tile_collection = makeTileCollection(sensor, tile, main_collection, modifiers=modifiers)
#     tile_collection = tile_collection.sort(sort_by)
# #     print("sorting tile collection")
# #     print('Size of collection (pre-Mosaic):',tile_collection.size().getInfo())
# #     return tile_collection

#     print('Making mosaic')
#     image_coords = tile_collection.first().get('system:footprint').getInfo()["coordinates"]
#     print("received image coordinates for the tile")
#     image_ROI = ee.Geometry.Polygon(image_coords)
#     cloudFree = mergeCollection(tile_collection)
#     cloudFree = cloudFree.clip(image_ROI)
#     cloudFree = cloudFree.reproject('EPSG:4326', None, 10)

#     new_params = export_params.copy()
#     new_params['img'] = cloudFree
#     new_params['roi'] = image_coords
#     new_params['sensor_name'] = sensor['name'].lower()
    
# #     return cloudFree,newC
    
#     return exportImageToGCS(**new_params),cloudFree

In [112]:
# def makeTileCollection(sensor, tile, main_collection, modifiers=[]):
#     filters_before, filters_after = makeFilterList(sensor)

#     collection = main_collection.filterMetadata('system:index', 'contains', tile)
                
#     if filters_before is not None:
#         collection = collection.filter( filters_before )

#     if modifiers and len(modifiers) > 0:
#         for m in modifiers:
#             collection = collection.map(m)

#     if filters_after:
# #         print(filters_after[0])
# #         collection = collection.filter(ee.Filter.lte('CLOUDY_PIXEL_PERCENTAGE', 10))
#         collection = collection.filter( filters_after )

#     return collection

In [113]:
# def exportImageToGCS(img=None, roi=None, bucket=None, filename=None, dest_path=None, resolution=10, start=True, sensor_name=None):
#     ## same as in the JS version

#     print('Exporting image')
    
#     if sensor_name == 'copernicus/s2':
#         img = img.select(['B4', 'B3', 'B2'])
#     elif sensor_name == 'copernicus/s2_sr':
#         img = img.select(['TCI_R', 'TCI_G', 'TCI_B'])

#     if roi:
#         print('Using ROI')
#         export = ee.batch.Export.image.toCloudStorage(
#           image=img,
#           description=filename,
#           scale=resolution,
#           region=roi,
#           fileNamePrefix=dest_path,
#           bucket=bucket,
#           maxPixels=1e13
#         )
#     else:
#         print('Not using ROI')
#         export = ee.batch.Export.image.toCloudStorage(
#             image=img,
#             description=filename,
#             scale=resolution,
#             fileNamePrefix=dest_path,
#             bucket=bucket,
#             maxPixels=1e13
#         )
    
# #     print()

#     if start:
#         export.start()

#     return(export)

In [114]:
# def mergeCollection(imgC, keepThresh=5, filterBy='CLOUDY_PERCENTAGE', filterType='less_than', mosaicBy='cloudShadowScore'):
#     # Select the best images, which are below the cloud free threshold, sort them in reverse order (worst on top) for mosaicing
#     ## same as the JS version
#     best = imgC.filterMetadata(filterBy, filterType, keepThresh).sort(filterBy, False)
# #     print('Size of best:', best.size().getInfo())
# #     print('Info on first image of collection:', imgC.first().getInfo()["bands"])
#     filtered = imgC.qualityMosaic(mosaicBy)
# #     print('Type of filtered:', type(filtered))

#     # Add the quality mosaic to fill in any missing areas of the ROI which aren't covered by good images
#     newC = ee.ImageCollection.fromImages( [filtered, best.mosaic()] )
    
# #     print('Size of newC:', newC.size().getInfo())

#     return ee.Image(newC.mosaic())

In [115]:
# tile_list = ["T32NMK","T32NML","T32NMM"]

In [116]:
# config_dict = load_config('config.yml')
# source = config_dict['data_list'][0]
# sensor = config_dict['sensors'][0]
# # export_folder = config_dict['drive_folder']
# export_folder = config_dict['bucket']

# print(export_folder)

In [9]:
print(source)

{'name': ['pipeline_v2_test_1'], 'start_date': '2019-01-01', 'end_date': '2020-12-31', 'geometry': 'point', 'size': 20000, 'resolution': 10, 'sort_by': 'name', 'features_src': 'ft:19Vexm10pJcAZ8tTVbl4j0HA8w2muyPPz6-cyvdxI', 'sensors': [0, 1, 2]}


In [10]:
print(sensor)

{'name': 'COPERNICUS/S2_SR', 'prefix': 'S2_CloudFree', 'type': 'opt', 'bands': ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B10', 'B11', 'B12', 'TCI_R', 'TCI_G', 'TCI_B'], 'filters_after': [{'CLOUDY_PERCENTAGE': {'lte': 10}}]}


In [16]:
main_polygon = [[[5.493164,8.276727],[5.449219,-5.703448],[31.376953,-4.959615],[31.157227,8.711359],[5.493164,8.276727]]]
main_polygon_geo = ee.Geometry.Polygon(main_polygon)

In [17]:
main_polygon_geo

ee.Geometry({
  "functionInvocationValue": {
    "functionName": "GeometryConstructors.Polygon",
    "arguments": {
      "coordinates": {
        "constantValue": [
          [
            [
              5.493164,
              8.276727
            ],
            [
              5.449219,
              -5.703448
            ],
            [
              31.376953,
              -4.959615
            ],
            [
              31.157227,
              8.711359
            ],
            [
              5.493164,
              8.276727
            ]
          ]
        ]
      },
      "evenOdd": {
        "constantValue": true
      }
    }
  }
})

In [12]:
# mosaic_imgs = process_datasource_tiles(source, sensor, export_folder, tile_list, main_polygon=None)

In [13]:
collections = process_datasource_tiles(source, sensor, export_folder, tile_list, main_polygon)

3 tiles have been loaded
Processing tile T32NMK
processing  1_pipeline_v2_test_1_Nov_14_23:16:33_2020
Inject B10
Making tile collection
Making mosaic


KeyboardInterrupt
2020-11-15T04:17:54Z


KeyboardInterrupt: 

In [14]:
collections

[<ee.imagecollection.ImageCollection at 0x118163df0>,
 <ee.imagecollection.ImageCollection at 0x118180610>,
 <ee.imagecollection.ImageCollection at 0x1181ac490>]

In [151]:
img

<ee.image.Image at 0x137e44c10>

In [152]:
img.select(['TCI_R']).getInfo()

KeyboardInterrupt
2020-11-15T00:21:15Z


KeyboardInterrupt: 

In [102]:
%%time

coll.getInfo()

KeyboardInterrupt
2020-11-14T23:58:58Z


KeyboardInterrupt: 

In [19]:
tile = tile_list[0]

tile

'T32NMK'

# Create Polygon List for Every Tile Within Congo Basin

In [40]:
df_tiles = pd.read_csv("/Users/purgatorid/Documents/GitHub/canopy-gis/data_collection/data/Geometry/Tile_list/tiles_polygons_full_v2.csv")

In [41]:
df_tiles

Unnamed: 0,Tile_ID,Polygon
0,32MMD,"[[8.100490685394892, -1.8974285062763014], [8...."
1,32MME,"[[8.260416537224202, -0.9930685574564732], [8...."
2,32NNG,"[[8.999730861504322, 1.8098298376179167], [8.9..."
3,32NNH,"[[9.076459111593785, 2.714261301620153], [9.07..."
4,32NNJ,"[[9.275359188516656, 3.6191061888902563], [9.2..."
...,...,...
303,35NPB,"[[27.898993787527058, 1.8096057131920158], [27..."
304,35NNC,"[[26.999730687673807, 2.714184965229428], [26...."
305,35NPC,"[[27.899550592739754, 2.7138487059430134], [27..."
306,35NPD,"[[27.900331529723736, 3.618620781907565], [27...."


In [42]:
df_tiles.to_json(r'/Users/purgatorid/Documents/GitHub/canopy-gis/data_collection/data/Geometry/Tile_list/tiles_polygons_full_v2.json')



In [12]:
df_tiles

Unnamed: 0,Tilename
0,32MLD
1,32MMD
2,32MLE
3,32MME
4,32NLF
...,...
436,35NPG
437,35NQG
438,35NPH
439,35NQH


In [16]:
tile_list = list(df_tiles["Tilename"])

In [17]:
# Old Basin Footprint
# Full_Basin = [[[5.493164,8.276727],[5.449219,-5.703448],[31.376953,-4.959615],[31.157227,8.711359],[5.493164,8.276727]]]
# main_polygon = ee.Geometry.Polygon(Full_Basin)

# New Basin Footprint (Polygon)

congo_basin_polygon =  [ [ [ 7.984666643405897, 4.815355316207002 ], 
     [ 7.58931630518685, 5.38007165110783 ], 
     [ 8.830437227602509, 6.79043633567108 ], 
     [ 11.344844023253321, 5.88973312574918 ], 
     [ 13.086055715022702, 6.459288352028883 ], 
     [ 14.713356361536142, 4.913352737841115 ], 
     [ 15.825846063036201, 4.92875495204759 ], 
     [ 20.392054192163812, 4.065945287129826 ], 
     [ 23.515743368893602, 6.112746427228712 ], 
     [ 30.271751714491138, 2.304938798701089 ], 
     [ 28.729612060844197, -4.391749834258915 ], 
     [ 25.247082155918701, -5.00021242727766 ], 
     [ 25.224878557828482, -3.093418809161228 ], 
     [ 24.233313983593739, -6.329050577716721 ], 
     [ 21.046773661925535, -6.730850974674368 ], 
     [ 19.220211818204795, -4.688071477466952 ], 
     [ 18.156368465602867, -3.527221937615936 ], 
     [ 14.339614843514214, -0.463710978407027 ], 
     [ 14.065691348308377, -1.245844321537668 ], 
     [ 14.085143295884563, -2.004470277008942 ], 
     [ 14.08479826967783, -3.811623581552471 ], 
     [ 12.742958913127694, -3.832953349170474 ], 
     [ 12.101044643113539, -3.191039079156319 ], 
     [ 12.295564118875404, -3.813501401594287 ], 
     [ 12.665389115203187, -4.121262889481581 ], 
     [ 13.764047274806716, -5.081793761883372 ], 
     [ 12.761977829944714, -6.138350729076138 ], 
     [ 8.385722656061915, -0.623381999099699 ], 
     [ 9.241608349414122, -0.253794995152155 ], 
     [ 9.241608349414122, 0.757706278809543 ], 
     [ 9.805714829123531, 3.208651673409044 ], 
     [ 9.047088873652257, 3.986729576456504 ], 
     [ 8.716405764857086, 4.434124370708794 ], 
     [ 8.334243029399895, 4.310494140285494 ], 
     [ 7.984666643405897, 4.815355316207002 ] ] ] 

main_polygon = ee.Geometry.Polygon(congo_basin_polygon)


start_date = source['start_date']
end_date = source['end_date']
    
main_collection = ee.ImageCollection(sensor['name']) \
                  .filterDate(ee.Date(start_date), ee.Date(end_date)) \
                  .filterBounds(main_polygon) 


good_collections = {}
bad_collections = []

for tile in tile_list:
    coll = main_collection.filterMetadata('system:index', 'contains', tile)
    if coll.size().getInfo() > 0:
        footprint = tile, coll.first().get('system:footprint').getInfo()["coordinates"]
        print(footprint)
        good_collections[tile] = [coll,footprint]
    else:
        print(tile," was not found or outside of Congo Basin")
        bad_collections.append(tile)
    

32MLD  was not found or outside of Congo Basin
('32MMD', [[8.100490685394892, -1.8974285062763014], [8.100505430072314, -1.8974313891451668], [9.0875786263505, -1.8976643591366447], [9.08762006267571, -1.897627768981458], [9.087668672323, -1.8975947485159605], [9.087631708734335, -0.9043676329109878], [9.087595167121314, -0.9043261659513959], [9.087562085583112, -0.904277643933334], [8.279911765445057, -0.9042068101044525], [8.279881702799077, -0.9042119948923486], [8.279861661041883, -0.9042221283242335], [8.276615218645674, -0.9063997482525754], [8.27659810040108, -0.9064221974991973], [8.269045667333431, -0.9194488510838009], [8.256092458123073, -0.9433412814212476], [8.109189814560382, -1.6120169708037368], [8.102687008842823, -1.6793232332020631], [8.100517175834433, -1.7053793607992822], [8.100422959881056, -1.8973413629618878], [8.100459529984631, -1.897382818326495], [8.100490685394892, -1.8974285062763014]])
32MLE  was not found or outside of Congo Basin
('32MME', [[8.26041653

In [42]:
coll.size().getInfo()

0

In [19]:
len(good_collections)

308

In [20]:
tile_polygons = {}
for key in good_collections.keys():
    tile_polygons[key] = good_collections[key][1][1]

In [21]:
tile_polygons_dict = {"Tile_ID":list(tile_polygons.keys()),"Polygon":list(tile_polygons.values())}

In [22]:
df_tile_polygons = pd.DataFrame.from_dict(tile_polygons_dict)

In [23]:
df_tile_polygons[df_tile_polygons["Tile_ID"] == "33MVV"]

Unnamed: 0,Tile_ID,Polygon
283,33MVV,"[[15.087633291104165, -0.9932231519597364], [1..."


In [25]:
df_tile_polygons

Unnamed: 0,Tile_ID,Polygon
0,32MMD,"[[8.100490685394892, -1.8974285062763014], [8...."
1,32MME,"[[8.260416537224202, -0.9930685574564732], [8...."
2,32NNG,"[[8.999730861504322, 1.8098298376179167], [8.9..."
3,32NNH,"[[9.076459111593785, 2.714261301620153], [9.07..."
4,32NNJ,"[[9.275359188516656, 3.6191061888902563], [9.2..."
...,...,...
303,35NPB,"[[27.898993787527058, 1.8096057131920158], [27..."
304,35NNC,"[[26.999730687673807, 2.714184965229428], [26...."
305,35NPC,"[[27.899550592739754, 2.7138487059430134], [27..."
306,35NPD,"[[27.900331529723736, 3.618620781907565], [27...."


In [24]:
df_tile_polygons.to_csv("/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Geometry/tile_lists/shub_tile_list/tiles_polygons_full_v2.csv",index=False)

In [106]:
congo_basin_polygon =  [ [ [ 7.984666643405897, 4.815355316207002 ], 
     [ 7.58931630518685, 5.38007165110783 ], 
     [ 8.830437227602509, 6.79043633567108 ], 
     [ 11.344844023253321, 5.88973312574918 ], 
     [ 13.086055715022702, 6.459288352028883 ], 
     [ 14.713356361536142, 4.913352737841115 ], 
     [ 15.825846063036201, 4.92875495204759 ], 
     [ 20.392054192163812, 4.065945287129826 ], 
     [ 23.515743368893602, 6.112746427228712 ], 
     [ 30.271751714491138, 2.304938798701089 ], 
     [ 28.729612060844197, -4.391749834258915 ], 
     [ 25.247082155918701, -5.00021242727766 ], 
     [ 25.224878557828482, -3.093418809161228 ], 
     [ 24.233313983593739, -6.329050577716721 ], 
     [ 21.046773661925535, -6.730850974674368 ], 
     [ 19.220211818204795, -4.688071477466952 ], 
     [ 18.156368465602867, -3.527221937615936 ], 
     [ 14.339614843514214, -0.463710978407027 ], 
     [ 14.065691348308377, -1.245844321537668 ], 
     [ 14.085143295884563, -2.004470277008942 ], 
     [ 14.08479826967783, -3.811623581552471 ], 
     [ 12.742958913127694, -3.832953349170474 ], 
     [ 12.101044643113539, -3.191039079156319 ], 
     [ 12.295564118875404, -3.813501401594287 ], 
     [ 12.665389115203187, -4.121262889481581 ], 
     [ 13.764047274806716, -5.081793761883372 ], 
     [ 12.761977829944714, -6.138350729076138 ], 
     [ 8.385722656061915, -0.623381999099699 ], 
     [ 9.241608349414122, -0.253794995152155 ], 
     [ 9.241608349414122, 0.757706278809543 ], 
     [ 9.805714829123531, 3.208651673409044 ], 
     [ 9.047088873652257, 3.986729576456504 ], 
     [ 8.716405764857086, 4.434124370708794 ], 
     [ 8.334243029399895, 4.310494140285494 ], 
     [ 7.984666643405897, 4.815355316207002 ] ] ] 

In [14]:
congo_basin_bbox = [ [ [ 7.589582783192249, 6.932363144917524 ], 
                      [ 31.006899150428829, 6.724604951545336 ], 
                      [ 28.899637474796641, -7.135834520570612 ], 
                      [ 7.322465105999436, -6.1564030375303 ], 
                      [ 7.589582783192249, 6.932363144917524 ] ] ]

In [171]:
# Map = gmap.Map()
features = []
for index,tile,polygon in df_tile_polygons.itertuples():
 
    create geometry object, create feature object, append to features list for feature collection creation 
    polys = ee.Geometry.Polygon(polygon)
    feature = ee.Feature(polys,{"name":tile})
    features.append(feature)
    
fc = ee.FeatureCollection(features)
Map.add_layer(fc, {}, 'default display')


geo_obj_poly = ee.Geometry.Polygon(congo_basin_polygon)
Map.add_layer(geo_obj_poly, {}, 'Full Polygon')

geo_obj_bbox = ee.Geometry.Polygon(congo_basin_bbox)
Map.centerObject(geo_obj_bbox,3)
Map.add_layer(geo_obj_bbox, {}, 'Bounding Box')
Map.add_child(folium.LayerControl())


Map

0 <class 'list'>
1 <class 'list'>
2 <class 'list'>
3 <class 'list'>
4 <class 'list'>
5 <class 'list'>
6 <class 'list'>
7 <class 'list'>
8 <class 'list'>
9 <class 'list'>
10 <class 'list'>
11 <class 'list'>
12 <class 'list'>
13 <class 'list'>
14 <class 'list'>
15 <class 'list'>
16 <class 'list'>
17 <class 'list'>
18 <class 'list'>
19 <class 'list'>
20 <class 'list'>
21 <class 'list'>
22 <class 'list'>
23 <class 'list'>
24 <class 'list'>
25 <class 'list'>
26 <class 'list'>
27 <class 'list'>
28 <class 'list'>
29 <class 'list'>
30 <class 'list'>
31 <class 'list'>
32 <class 'list'>
33 <class 'list'>
34 <class 'list'>
35 <class 'list'>
36 <class 'list'>
37 <class 'list'>
38 <class 'list'>
39 <class 'list'>
40 <class 'list'>
41 <class 'list'>
42 <class 'list'>
43 <class 'list'>
44 <class 'list'>
45 <class 'list'>
46 <class 'list'>
47 <class 'list'>
48 <class 'list'>
49 <class 'list'>
50 <class 'list'>
51 <class 'list'>
52 <class 'list'>
53 <class 'list'>
54 <class 'list'>
55 <class 'list'>
56

In [25]:
img = coll.first()

In [26]:
ee.Algorithms.GeometryConstructors.Polygon(
              ee.Geometry( img.get('system:footprint') ).coordinates()
              )

ee.Geometry({
  "functionInvocationValue": {
    "functionName": "GeometryConstructors.Polygon",
    "arguments": {
      "coordinates": {
        "functionInvocationValue": {
          "functionName": "Geometry.coordinates",
          "arguments": {
            "geometry": {
              "functionInvocationValue": {
                "functionName": "Element.get",
                "arguments": {
                  "object": {
                    "functionInvocationValue": {
                      "functionName": "Collection.first",
                      "arguments": {
                        "collection": {
                          "functionInvocationValue": {
                            "functionName": "Collection.filter",
                            "arguments": {
                              "collection": {
                                "functionInvocationValue": {
                                  "functionName": "Collection.filter",
                                  "arguments": 

In [177]:
coll.size().getInfo()

137

In [244]:
final_mosaic,filtered_mosaic = export_single_tile(
            tile=tile,
            main_collection=main_collection,
            main_polygon=main_polygon,
            sensor=sensor,
            sort_by='CLOUDY_PERCENTAGE'
        )

Inject B10
Making tile collection
Making mosaic


In [240]:
# final_mosaic2 = final_mosaic.select('TCI_R', 'TCI_G', 'TCI_B')

In [16]:
# imgC.first().getInfo()


image_coords = imgC.first().get('system:footprint').getInfo()["coordinates"]


NameError: name 'imgC' is not defined

In [15]:
ee.Geometry.Polygon(image_coords)

NameError: name 'image_coords' is not defined

In [290]:
def export_single_image(img=img,dest_path = 'S2_CloudFree/pipeline_v2_single_tile_test/',name = 'zhenya_tile_test', region = image_coords):
    
    time_stamp = "_".join(time.ctime().split(" ")[1:])
    filename = "_".join([str(0)] + [name] + [time_stamp])
    dest_path_full = dest_path + filename
    export = ee.batch.Export.image.toCloudStorage(
      image=img,
      description=filename,
      region=region,  
      scale=10,
      fileNamePrefix=dest_path_full,
      bucket='project-canopy-temp-2',
      maxPixels=1e13
    )
    export.start()
    return export

In [294]:
export = export_single_image(img=final_mosaic2)

In [295]:
while export.active():
    print(export.status(), end="\r", flush=True)

{'state': 'RUNNING', 'description': '0_zhenya_tile_test_Nov_14_22:29:01_2020', 'creation_timestamp_ms': 1605410941405, 'update_timestamp_ms': 1605411152960, 'start_timestamp_ms': 1605411023361, 'task_type': 'EXPORT_IMAGE', 'attempt': 1, 'id': 'A6444TKCRFTZGFUDT4A3KKE4', 'name': 'projects/earthengine-legacy/operations/A6444TKCRFTZGFUDT4A3KKE4'}

KeyboardInterrupt
2020-11-15T03:33:14Z


KeyboardInterrupt: 

# Visualize Mosaic

In [22]:
import folium
import geemap.eefolium as gmap

TCI_RGB = ['TCI_R', 'TCI_G', 'TCI_B']
vis = {'bands': TCI_RGB, "gamma": [2, 2, 2]}


def visualize_geo(coords):
    Map = gmap.Map()
    geo_obj = ee.Geometry.Polygon(coords)
    Map.centerObject(geo_obj,3)
    Map.add_layer(geo_obj, {}, 'default display')
    Map.add_child(folium.LayerControl())
    return Map

def visualize_raster(img):
    Map = gmap.Map()
    Map.centerObject(img,10)
    Map.addLayer(img, vis)
    Map.add_child(folium.LayerControl())
    return Map
    
    

In [96]:
visualize_geo(Full_Basin)

In [15]:
congo_basin_bbox

[[[7.589582783192249, 6.932363144917524],
  [31.00689915042883, 6.724604951545336],
  [28.89963747479664, -7.135834520570612],
  [7.322465105999436, -6.1564030375303],
  [7.589582783192249, 6.932363144917524]]]

In [27]:
visualize_raster(exports[1])

# Sandbox

In [261]:
ee.Geometry(imgC.first().get('ROI'))

ee.Geometry({
  "functionInvocationValue": {
    "functionName": "Element.get",
    "arguments": {
      "object": {
        "functionInvocationValue": {
          "functionName": "Collection.first",
          "arguments": {
            "collection": {
              "functionInvocationValue": {
                "functionName": "Collection.limit",
                "arguments": {
                  "collection": {
                    "functionInvocationValue": {
                      "functionName": "Collection.filter",
                      "arguments": {
                        "collection": {
                          "functionInvocationValue": {
                            "functionName": "Collection.map",
                            "arguments": {
                              "baseAlgorithm": {
                                "functionDefinitionValue": {
                                  "argumentNames": [
                                    "_MAPPING_VAR_0_0"
                        

In [260]:
# imgC.first().geometry().toGeoJSON()


ee.Algorithms.GeometryConstructors.Polygon(
              ee.Geometry( imgC.first().get('system:footprint') ).coordinates()
              )

ee.Geometry({
  "functionInvocationValue": {
    "functionName": "GeometryConstructors.Polygon",
    "arguments": {
      "coordinates": {
        "functionInvocationValue": {
          "functionName": "Geometry.coordinates",
          "arguments": {
            "geometry": {
              "functionInvocationValue": {
                "functionName": "Element.get",
                "arguments": {
                  "object": {
                    "functionInvocationValue": {
                      "functionName": "Collection.first",
                      "arguments": {
                        "collection": {
                          "functionInvocationValue": {
                            "functionName": "Collection.limit",
                            "arguments": {
                              "collection": {
                                "functionInvocationValue": {
                                  "functionName": "Collection.filter",
                                  "arguments": {

In [34]:
type(coll)

ee.imagecollection.ImageCollection

In [35]:
type(coll.first())

ee.image.Image

In [37]:
coll2 = coll.map(inject_B10)

In [38]:
coll2.size().getInfo()

137

In [39]:
coll2.first().select(['B10']).getInfo()["id"]

'COPERNICUS/S2_SR/20190105T094401_20190105T095815_T32NMK'

In [50]:
collections = []
modifiers = [inject_B10, sentinel2CloudScore, calcCloudCoverage, sentinel2ProjectShadows, computeQualityScore]

# modifiers = [sentinel2CloudScore]

for m in modifiers:
    coll3 = coll2.map(m)
    collections.append(coll3)

In [51]:
for coll in collections:
    print(coll.first().select(['B1']).getInfo()["id"])

COPERNICUS/S2_SR/20190105T094401_20190105T095815_T32NMK
COPERNICUS/S2_SR/20190105T094401_20190105T095815_T32NMK


EEException: Collection.first: Error in map(ID=20190105T094401_20190105T095815_T32NMK):
Image.select: Pattern 'cloudScore' did not match any bands.

In [52]:
def calcCloudCoverage(img, cloudThresh=0.2):
    imgPoly = ee.Algorithms.GeometryConstructors.Polygon(
              ee.Geometry( img.get('system:footprint') ).coordinates()
              )

#     roi = ee.Geometry.Polygon(img.get('ROI'))
    roi = img.geometry()
    
#     return roi

    intersection = roi.intersection(imgPoly, ee.ErrorMargin(0.5))
    cloudMask = img.select(['cloudScore']).gt(cloudThresh).clip(roi).rename('cloudMask')

    cloudAreaImg = cloudMask.multiply(ee.Image.pixelArea())

    stats = cloudAreaImg.reduceRegion(
      reducer=ee.Reducer.sum(),
      geometry=roi,
      scale=10,
      maxPixels=1e12,
      ## bottom two not in the javascript version
      bestEffort=True,
      tileScale=16
    )

    ## maxAreaError not in the javascript version, which uses the default
    ## for the .area function calls
    maxAreaError = 10
    cloudPercent = ee.Number(stats.get('cloudMask')).divide(imgPoly.area(maxAreaError)).multiply(100)
    coveragePercent = ee.Number(intersection.area(maxAreaError)).divide(roi.area(maxAreaError)).multiply(100)
    cloudPercentROI = ee.Number(stats.get('cloudMask')).divide(roi.area(maxAreaError)).multiply(100)

    img = img.set('CLOUDY_PERCENTAGE', cloudPercent)
    img = img.set('ROI_COVERAGE_PERCENT', coveragePercent)
    img = img.set('CLOUDY_PERCENTAGE_ROI', cloudPercentROI)

    return img

In [55]:
img = collections[1].first()

In [56]:
img2 = calcCloudCoverage(img)

In [57]:
img2.select(['B1']).getInfo()["id"]

'COPERNICUS/S2_SR/20190105T094401_20190105T095815_T32NMK'

In [58]:
collections[2].first().select(['B1']).getInfo()["id"]

EEException: Collection.first: Error in map(ID=20190105T094401_20190105T095815_T32NMK):
Image.select: Pattern 'cloudScore' did not match any bands.

In [43]:
modifiers = [calcCloudCoverage]

for m in modifiers:
    coll4 = coll3.map(m)

In [229]:
coll4.first().select(['B1']).getInfo()["id"]

EEException: Collection.first: Error in map(ID=20190105T094401_20190105T095815_T32NMK):
Image.clip: The area for image clipping must be a geometry, a Feature or a FeatureCollection.

In [187]:
coll2.first().select(['cloudMask']).getInfo()

EEException: Collection.first: Error in map(ID=20190105T094401_20190105T095815_T32NMK):
Image.select: Pattern 'cloudScore' did not match any bands.

In [179]:
coll3 = coll2.filter(ee.Filter.lte('CLOUDY_PIXEL_PERCENTAGE', 10))

In [180]:
coll3.size().getInfo()

2

In [181]:
coll3.getInfo()["features"]

EEException: Error in map(ID=20200105T094309_20200105T095703_T32NMK):
Image.select: Pattern 'cloudScore' did not match any bands.

In [None]:
coll4 = coll3.sort()

In [61]:
ee.Filter.lte('CLOUDY_PIXEL_PERCENTAGE', 10).getInfo()

{'type': 'Filter.not',
 'filter': {'type': 'Filter.lt',
  'leftValue': 10,
  'rightField': 'CLOUDY_PIXEL_PERCENTAGE'}}

In [79]:
assert ee.Filter.lte('CLOUDY_PIXEL_PERCENTAGE', 10) == makeFilterList(sensor)[1][0]

AssertionError: 

In [85]:
modifiers = [inject_B10, sentinel2CloudScore, calcCloudCoverage, sentinel2ProjectShadows, computeQualityScore]

coll4 = makeTileCollection(sensor, tile, main_collection, modifiers)

In [86]:
coll4.size().getInfo()

2

In [69]:
collections = process_datasource_tiles(source, sensor, export_folder, tile_list)

3 tiles have been loaded
Processing tile T32NMK
processing  1_pipeline_v2_test_1_Nov_13_14:23:55_2020
Inject B10
Making tile collection
Processing tile T32NML
processing  2_pipeline_v2_test_1_Nov_13_14:23:55_2020
Inject B10
Making tile collection
Processing tile T32NMM
processing  3_pipeline_v2_test_1_Nov_13_14:23:55_2020
Inject B10
Making tile collection


  with loop.timer(seconds, ref=ref) as t:


In [70]:
collections[0].getInfo()

137

In [71]:
collections

[<ee.imagecollection.ImageCollection at 0x1218b9f11c0>,
 <ee.imagecollection.ImageCollection at 0x1218ba38700>,
 <ee.imagecollection.ImageCollection at 0x1218ba4acd0>]

In [72]:
coll = collections[0]

coll.getInfo()

EEException: Error in map(ID=20190105T094401_20190105T095815_T32NMK):
Image.clip: The area for image clipping must be a geometry, a Feature or a FeatureCollection.

In [73]:
def makeTileCollection(sensor, tile, main_collection, modifiers=[]):
    filters_before, filters_after = makeFilterList(sensor)

    print('1:', main_collection.size().getInfo())
    
    collection = main_collection.filterMetadata('system:index', 'contains', tile)
                
    print('2:', collection.size().getInfo())
        
    if filters_before is not None:
        collection = collection.filter( filters_before )

    if modifiers and len(modifiers) > 0:
        for m in modifiers:
            collection = collection.map(m)
            print(f'3 {m}:', collection.size().getInfo())

    if filters_after:
        collection = collection.filter( filters_after )
        print('4:', collection.size().getInfo())

    return collection

In [74]:
main_collection = ee.ImageCollection(sensor['name']) \
                  .filterDate(ee.Date('2020-01-01'), ee.Date('2020-01-07'))

In [75]:
coll = makeTileCollection(sensor, tile, main_collection, modifiers)

1: 50035
2: 1
3 <function inject_B10 at 0x000001218B1C0700>: 1
3 <function sentinel2CloudScore at 0x000001218B1ABD30>: 1
3 <function calcCloudCoverage at 0x000001218B1C0550>: 1
3 <function sentinel2ProjectShadows at 0x000001218B1C09D0>: 1
3 <function computeQualityScore at 0x000001218B1C0790>: 1


EEException: Collection.reduceColumns: Error in map(ID=20200105T094309_20200105T095703_T32NMK):
Image.clip: The area for image clipping must be a geometry, a Feature or a FeatureCollection.

  with loop.timer(seconds, ref=ref) as t:


# Time Date

In [1]:
from pandas.tseries.offsets import DateOffset
import ee
import yaml
import time
import os
import json
import pandas as pd
from utils import exportImageToGDrive,exportImageToGCS
from download_sen12 import *

ee.Initialize()

In [2]:
def import_aois(csv_loc, Full_Congo_Pull=False, start_date=None,
                end_date=None, days_duration=90, poly_start=0, poly_limit=None):
    features = []
    polygons = []
    day_offset = days_duration / 2
    start_end_list = []
    
    if Full_Congo_Pull:
        df_congo_tile_list = pd.read_csv(csv_loc)
        for index,tile,polygon in df_congo_tile_list.itertuples():
            poly_obj = ee.Geometry.Polygon(json.loads(polygon))
            feature = ee.Feature(poly_obj,{"name":tile})
            features.append(feature)
    else:
        feature_id = poly_start
        
        if poly_limit:
            df_labels = pd.read_csv(csv_loc, skiprows=range(1, poly_start+1), nrows=poly_limit)
        else:
            df_labels = pd.read_csv(csv_loc, skiprows=range(1, poly_start+1))

        df_labels = df_labels[["center-lat","center-long","polygon","Labels combined","tile date","area (km2)"]]
        df_labels["tile date"] = pd.to_datetime(df_labels["tile date"])
        start = (df_labels["tile date"] + DateOffset(days=-day_offset))
        end = (df_labels["tile date"] + DateOffset(days=day_offset))
        for i in range(len(start)):
            start_date = str(start[i])[:10]
            end_date = str(end[i])[:10]
            original_date = df_labels.loc[i, 'tile date']
            area = df_labels.loc[i, 'area (km2)']
            date_dict = {
                'start_date': start_date,
                'end_date': end_date,
                'original_date': original_date,
                'day_offset': day_offset,
                'area': area
            }
            start_end_list.append(date_dict)

        for polygon in df_labels["polygon"]:
            polygons.append(json.loads(polygon)["coordinates"])
            
        for poly in polygons:
            # create an roi. first item in Misha's label list
            feature_id += 1 
            # create geometry object, create feature object, append to features list for feature collection creation 
            polys = ee.Geometry.Polygon(poly)
            feature = ee.Feature(polys,{"name":feature_id})
            features.append(feature)
            
    return ee.FeatureCollection(features),start_end_list,df_labels


# ### CHANGE BELOW PATH ###
zhenya_label_path = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Sentinel_Data/Labelled/Tiles_v3/Polygon_List/polygons_101320.csv"
# david__label_path = 'D:/canopy_data/csvs/polygons_101320.csv'
# zhenya_tilelist_path = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Geometry/tile_lists/shub_tile_list/tiles_polygons_full.csv"

In [3]:
config_dict = load_config('config.yml')
source = config_dict['data_list'][0]
sensor = config_dict['sensors'][0]
# export_folder = config_dict['drive_folder']
export_folder = config_dict['bucket']

print(export_folder)

project-canopy-temp-2


  return yaml.load(stream)


In [4]:
fc,date_range_list,df_labels = import_aois(zhenya_label_path)

In [5]:
exports = process_datasource_custom_daterange(source,sensor, export_folder, fc, date_range_list)

101 features have been loaded
processing polygon 1 of 101
processing polygon 2 of 101
processing polygon 3 of 101
processing polygon 4 of 101
processing polygon 5 of 101
processing polygon 6 of 101
processing polygon 7 of 101
processing polygon 8 of 101
processing polygon 9 of 101
processing polygon 10 of 101
processing polygon 11 of 101
processing polygon 12 of 101
processing polygon 13 of 101
processing polygon 14 of 101
processing polygon 15 of 101
processing polygon 16 of 101
processing polygon 17 of 101
processing polygon 18 of 101
processing polygon 19 of 101
processing polygon 20 of 101
processing polygon 21 of 101
processing polygon 22 of 101
processing polygon 23 of 101
processing polygon 24 of 101
processing polygon 25 of 101
processing polygon 26 of 101
processing polygon 27 of 101
processing polygon 28 of 101
processing polygon 29 of 101
processing polygon 30 of 101
processing polygon 31 of 101
processing polygon 32 of 101
processing polygon 33 of 101
processing polygon 34 

KeyboardInterrupt
2020-12-08T23:41:35Z


KeyboardInterrupt: 

In [11]:
exports[0].status()

{'state': 'READY',
 'description': '1_dynamic_date_range_v4_secondary_sort_using_area_full_band_output_Dec__8_14:50:53_2020',
 'creation_timestamp_ms': 1607457056685,
 'update_timestamp_ms': 1607457056685,
 'start_timestamp_ms': 0,
 'task_type': 'EXPORT_IMAGE',
 'id': 'F3NRUJSEATYSQGZ67PFLBS5F',
 'name': 'projects/earthengine-legacy/operations/F3NRUJSEATYSQGZ67PFLBS5F'}

In [9]:
exports[0][0].size().getInfo()

13

In [13]:
exports[0][0].first().getInfo()

{'type': 'Image',
 'bands': [{'id': 'B1',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [295, 264],
   'origin': [164, 897],
   'crs': 'EPSG:32632',
   'crs_transform': [60, 0, 499980, 0, -60, 700020]},
  {'id': 'B2',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [1767, 1580],
   'origin': [984, 5384],
   'crs': 'EPSG:32632',
   'crs_transform': [10, 0, 499980, 0, -10, 700020]},
  {'id': 'B3',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [1767, 1580],
   'origin': [984, 5384],
   'crs': 'EPSG:32632',
   'crs_transform': [10, 0, 499980, 0, -10, 700020]},
  {'id': 'B4',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [1767, 1580],
   'origin': [984, 5384],
   'crs': 'EPSG:32632',
   'crs_transform': [10, 0, 499980, 0, -10, 700020

In [14]:
exports[0][1].first().getInfo()

{'type': 'Image',
 'bands': [{'id': 'B1',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [295, 264],
   'origin': [164, 897],
   'crs': 'EPSG:32632',
   'crs_transform': [60, 0, 499980, 0, -60, 700020]},
  {'id': 'B2',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [1767, 1580],
   'origin': [984, 5384],
   'crs': 'EPSG:32632',
   'crs_transform': [10, 0, 499980, 0, -10, 700020]},
  {'id': 'B3',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [1767, 1580],
   'origin': [984, 5384],
   'crs': 'EPSG:32632',
   'crs_transform': [10, 0, 499980, 0, -10, 700020]},
  {'id': 'B4',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [1767, 1580],
   'origin': [984, 5384],
   'crs': 'EPSG:32632',
   'crs_transform': [10, 0, 499980, 0, -10, 700020

In [11]:
# def image_collection_secondary_sort(col,primary_sort = 'CLOUDY_PERCENTAGE',secondary_sort = 'CLOUDY_PIXEL_PERCENTAGE'):
    
#     img_objs = []
    
#     primary_list = col.aggregate_array(primary_sort)
#     secondary_list = col.aggregate_array(secondary_sort)
#     image_id_list = col.aggregate_array('system:index')

    
    
#     sort_dic = \
#     {primary_sort:primary_list,
#      secondary_sort:secondary_list,
#      "id":image_id_list}
    

#     new_sort_dic = {}
#     for key in sort_dic:
#         new_sort_dic[key] = sort_dic[key].getInfo()
        
# #     df = pd.DataFrame(new_sort_dic)
    
#     return new_sort_dic
    
# #     df = df.sort_values(by=[primary_sort,secondary_sort])
    
# #     df = df.reset_index(drop=True)
    
    
# #     for image_id in df["id"]:
# #         img = ee.Image('COPERNICUS/S2_SR/' + image_id)
# #         img_objs.append(img)
        
# #     return ee.ImageCollection(img_objs)

In [11]:
df = image_collection_secondary_sort(exports[0][0])

In [12]:
df

Unnamed: 0,CLOUDY_PERCENTAGE,CLOUDY_PIXEL_PERCENTAGE,id
0,0.0,1.654612,20191216T094319_20191216T095042_T32NNM
1,0.0,0.002038,20191226T094319_20191226T095118_T32NNM
2,0.0,0.577385,20191231T094411_20191231T095441_T32NNM
3,0.0,1.778067,20200105T094309_20200105T095703_T32NNM
4,0.403145,6.665471,20200110T094351_20200110T095439_T32NNM
5,0.0,1.54461,20200120T094321_20200120T095439_T32NNM
6,0.0,0.002615,20200125T094159_20200125T095047_T32NNM
7,0.055692,5.425963,20200130T094231_20200130T095438_T32NNM
8,0.0,5.801137,20200204T094109_20200204T095417_T32NNM
9,0.0,4.58876,20200209T094131_20200209T095437_T32NNM


In [21]:
dic = image_collection_secondary_sort(exports[0][1])

In [22]:
dic

{'CLOUDY_PERCENTAGE': [],
 'CLOUDY_PIXEL_PERCENTAGE': [0.002038,
  0.002615,
  0.577385,
  1.54461,
  1.654612,
  1.778067,
  4.58876,
  5.801137,
  2.348498,
  5.425963,
  6.665471,
  2.256456,
  1.784462],
 'id': ['20191226T094319_20191226T095118_T32NNM',
  '20200125T094159_20200125T095047_T32NNM',
  '20191231T094411_20191231T095441_T32NNM',
  '20200120T094321_20200120T095439_T32NNM',
  '20191216T094319_20191216T095042_T32NNM',
  '20200105T094309_20200105T095703_T32NNM',
  '20200209T094131_20200209T095437_T32NNM',
  '20200204T094109_20200204T095417_T32NNM',
  '20200214T094029_20200214T094641_T32NNM',
  '20200130T094231_20200130T095438_T32NNM',
  '20200110T094351_20200110T095439_T32NNM',
  '20200219T094031_20200219T095439_T32NNM',
  '20200224T094029_20200224T095355_T32NNM']}

In [25]:
exports[0][1].first().getInfo()

{'type': 'Image',
 'bands': [{'id': 'B1',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [1830, 1830],
   'crs': 'EPSG:32632',
   'crs_transform': [60, 0, 499980, 0, -60, 700020]},
  {'id': 'B2',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [10980, 10980],
   'crs': 'EPSG:32632',
   'crs_transform': [10, 0, 499980, 0, -10, 700020]},
  {'id': 'B3',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [10980, 10980],
   'crs': 'EPSG:32632',
   'crs_transform': [10, 0, 499980, 0, -10, 700020]},
  {'id': 'B4',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [10980, 10980],
   'crs': 'EPSG:32632',
   'crs_transform': [10, 0, 499980, 0, -10, 700020]},
  {'id': 'B5',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,


# Sandbox #2 - Outdated

In [9]:
def import_aois(csv_loc,Full_Congo_Pull = False,start_date=None,end_date=None,days_duration=90, poly_limit=None):
    features = []
    polygons = []
    day_offset = days_duration / 2
    start_end_list = []
    
    if Full_Congo_Pull:
        df_congo_tile_list = pd.read_csv(csv_loc)
        for index,tile,polygon in df_congo_tile_list.itertuples():
            poly_obj = ee.Geometry.Polygon(json.loads(polygon))
            feature = ee.Feature(poly_obj,{"name":tile})
            features.append(feature)
    else:
        feature_id = 0 
        
        df_labels = pd.read_csv(csv_loc)
        df_labels = df_labels[["center-lat","center-long","polygon","Labels combined","tile date"]]
        df_labels["tile date"] = pd.to_datetime(df_labels["tile date"])
        start = (df_labels["tile date"] + DateOffset(days=-day_offset))
        end = (df_labels["tile date"] + DateOffset(days=day_offset))
        for i in range(len(start)):
            start_date = str(start[i])[:10]
            end_date = str(end[i])[:10]
            original_date = df_labels.loc[i, 'tile date']
            date_dict = {
                'start_date': start_date,
                'end_date': end_date,
                'original_date': original_date,
                'day_offset': day_offset
            }
            start_end_list.append(date_dict)

        for polygon in df_labels["polygon"]:
            polygons.append(json.loads(polygon)["coordinates"])
            
        if poly_limit:
            polygons = polygons[0:poly_limit]  
        for poly in polygons:
            # create an roi. first item in Misha's label list
            feature_id += 1 
            # create geometry object, create feature object, append to features list for feature collection creation 
            polys = ee.Geometry.Polygon(poly)
            feature = ee.Feature(polys,{"name":feature_id})
            features.append(feature)
            
    return features
#     return ee.FeatureCollection(features),start_end_list



# ### CHANGE BELOW PATH ###
zhenya_label_path = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Sentinel_Data/Labelled/Tiles_v3/Polygon_List/polygons_101320.csv"
david__label_path = 'D:/canopy_data/csvs/polygons_101320.csv'
#zhenya_tilelist_path = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Geometry/tile_lists/shub_tile_list/tiles_polygons_full.csv"

In [10]:
config_dict = load_config('config.yml')
source = config_dict['data_list'][0]
sensor = config_dict['sensors'][0]
# export_folder = config_dict['drive_folder']
export_folder = config_dict['bucket']

print(export_folder)

project-canopy-temp-2


  return yaml.load(stream)


In [11]:
features = import_aois(zhenya_label_path,days_duration=90,poly_limit=None)

In [18]:
features[80].getInfo()

{'type': 'Feature',
 'geometry': {'type': 'Polygon',
  'coordinates': [[[20.163345, 1.098565],
    [20.23613, 0.580206],
    [20.904236, 0.307616],
    [21.175461, 0.420223],
    [20.736008, 1.023047],
    [20.163345, 1.098565]]]},
 'properties': {'name': 81}}

In [6]:
exports = process_datasource_custom_daterange(source,sensor, export_folder, fc, date_range_list)

101 features have been loaded
processing  1_dynamic_date_range_v3_secondary_sort_Dec__1_13:26:54_2020
13
processing  2_dynamic_date_range_v3_secondary_sort_Dec__1_13:26:55_2020
13
processing  3_dynamic_date_range_v3_secondary_sort_Dec__1_13:26:55_2020
36
processing  4_dynamic_date_range_v3_secondary_sort_Dec__1_13:27:00_2020
31
processing  5_dynamic_date_range_v3_secondary_sort_Dec__1_13:27:04_2020
36
processing  6_dynamic_date_range_v3_secondary_sort_Dec__1_13:27:06_2020
6
processing  7_dynamic_date_range_v3_secondary_sort_Dec__1_13:27:08_2020
52
processing  8_dynamic_date_range_v3_secondary_sort_Dec__1_13:27:10_2020
10
processing  9_dynamic_date_range_v3_secondary_sort_Dec__1_13:27:11_2020
27
processing  10_dynamic_date_range_v3_secondary_sort_Dec__1_13:27:14_2020
40
processing  11_dynamic_date_range_v3_secondary_sort_Dec__1_13:27:18_2020
14
processing  12_dynamic_date_range_v3_secondary_sort_Dec__1_13:27:19_2020
24
processing  13_dynamic_date_range_v3_secondary_sort_Dec__1_13:27:20_

KeyboardInterrupt
2020-12-01T18:34:03Z


KeyboardInterrupt: 