The following script was written to export Band data from a LANDSAT8 image taken over BC, Canada.

The data will be used to train a machine learning model to predict Band 4 from LANDSAT8 using the measurements from the other bands in other scripts:

`landsat8_B4_skl_prediction.ipynb`

`landsat8_B4_skl_keras_prediction.ipynb`

A smaller sample region in BC, and another in the Sahara desert will also be exported as testing tasks for the trained models.

In [1]:
from pprint import pprint       # Function for better formatted printing to console
import ee                       # Google Earth Engine
import folium                   # Visualization library for GIS data and maps
import tensorflow as tf         # Machine learning platform (includes high-level API keras as tf.keras)
import json                     # JSON file manipulation library
import time      

In [2]:
print(tf.__version__)

2.1.0


In [3]:
ee.Initialize()                 # Necessary to read credentials from credentials.json and authenticate future commands

In [4]:
# Use these bands for prediction.
bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7']
# Use Landsat 8 surface reflectance data. Image collection from Earth Engine libraries
l8sr = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')

# Cloud masking function.
def maskL8sr(image):
  cloudShadowBitMask = ee.Number(2).pow(3).int()
  cloudsBitMask = ee.Number(2).pow(5).int()
  qa = image.select('pixel_qa')
  mask = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(
    qa.bitwiseAnd(cloudsBitMask).eq(0))
  return image.updateMask(mask).select(bands).divide(10000)

# The image input data is a 2018 cloud-masked median composite.
image = l8sr.filterDate('2018-01-01', '2018-12-31').map(maskL8sr).median()

# Use folium to visualize the imagery.
mapIdDict = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})
map = folium.Map(location=[45.4, -75.7])
folium.TileLayer(
    tiles=mapIdDict['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='median composite',
  ).add_to(map)
map.add_child(folium.LayerControl())
map

In [6]:
# Sample the image at the points and add a random column.
sampleRegion = ee.Geometry.Rectangle([-139, 48.5, -114, 60])            # Rectangular region in BC
sample = image.sample(region=sampleRegion, scale=10000).randomColumn()  # Sample band data from the sample region at a scale of 10000m (larger pixel scale)

# Partition the sample approximately 70-30.
training = sample.filter(ee.Filter.lt('random', 0.7))
testing = sample.filter(ee.Filter.gte('random', 0.7))

# This is list of all the properties we want to export.
featureNames = list(bands)

In [9]:
# Create the export tasks. Exporting the testing data as both a TFRecord and CSV to a google drive folder.

testingTaskCSV = ee.batch.Export.table.toDrive(
  collection=testing,
  description='Testing Export',
  folder='TFData',
  fileFormat='csv',
  selectors=featureNames)

testingTaskTFRecord = ee.batch.Export.table.toDrive(
  collection=testing,
  description='Testing Export',
  folder='TFData',
  fileFormat='TFRecord',
  selectors=featureNames)

In [None]:
# Start both testing data batch export tasks in Earth Engine
testingTaskCSV.start()
testingTaskTFRecord.start()

In [None]:
# Wait loop for both tasks to complete. Polling for each tasks status until the TFRecord tasks is no longer active
while testingTaskTFRecord.active():
    print(ee.data.getTaskStatus(testingTaskCSV.id))
    print(ee.data.getTaskStatus(testingTaskTFRecord.id))
    time.sleep(5)
print('Done with testing export.')

In [None]:
# Print the list of all tasks in Earth Engine to verify that the export tasks completed successfully
print(ee.batch.Task.list())

In [10]:
# Create the export tasks. Exporting the training data as both a TFRecord and CSV to a google drive folder.

trainingTaskCSV = ee.batch.Export.table.toDrive(
  collection=training,
  description='Training Export',
  folder='TFData',
  fileFormat='csv',
  selectors=featureNames)

trainingTaskTFRecord = ee.batch.Export.table.toDrive(
  collection=training,
  description='Training Export',
  folder='TFData',
  fileFormat='TFRecord',
  selectors=featureNames)

In [None]:
# Start both training data batch export tasks in Earth Engine
trainingTaskCSV.start()
trainingTaskTFRecord.start()

In [None]:
# Wait loop for both tasks to complete. Polling for each tasks status until the TFRecord task is no longer active
while trainingTaskTFRecord.active():
  print(ee.data.getTaskStatus(trainingTaskCSV.id))
  print(ee.data.getTaskStatus(trainingTaskTFRecord.id))
  time.sleep(5)
print('Done with training export.')

In [None]:
# Print the list of all tasks in Earth Engine to verify that the export tasks completed successfully
print(ee.batch.Task.list())

In [11]:
# Creating two export regions, one in BC and one in the Sahara to try to predict on using a machine learning model in another script
exportRegionBC = ee.Geometry.Rectangle([-122, 58, -121, 59])
exportRegionSahara = ee.Geometry.Rectangle([13, 24, 14, 25])

# Sampling the bands in the region at a scale of 100m, which is relatively dense sampling
exportBC = image.sample(region=exportRegionBC, scale=100)
exportSahara = image.sample(region=exportRegionSahara, scale=100)

# Setup the export tasks. Exporting both as CSVs to the same Google Drive folder as training and testing data was exported to earlier
BCimageTaskCSV = ee.batch.Export.table.toDrive(
  collection=exportBC,
  description='BC Image Export',
  folder='TFData',
  fileFormat='csv',
  selectors=featureNames
)

SaharaImageTaskCSV = ee.batch.Export.table.toDrive(
  collection=exportSahara,
  description='Sahara Image Export',
  folder='TFData',
  fileFormat='csv',
  selectors=featureNames,
)

In [12]:
# Start the export tasks in Earth Engine.
BCimageTaskCSV.start()
SaharaImageTaskCSV.start()

In [None]:
# Wait loop for both tasks to complete. Polling for each tasks status until the Sahara image export task is no longer active.
while SaharaImageTaskTFRecord.active():
  print(ee.data.getTaskStatus(BCimageTaskTFRecord.id))
  print(ee.data.getTaskStatus(SaharaImageTaskTFRecord.id))
  time.sleep(5)
print('Done with image export.')

In [7]:
# Print the list of all tasks in Earth Engine to verify that the export tasks completed successfully
print(ee.batch.Task.list())

[<Task EXPORT_FEATURES: Sahara Image Export (COMPLETED)>, <Task EXPORT_FEATURES: BC Image Export (COMPLETED)>, <Task EXPORT_IMAGE: Sahara Image Export (FAILED)>, <Task EXPORT_IMAGE: BC Image Export (COMPLETED)>, <Task EXPORT_IMAGE: Image Export (FAILED)>, <Task EXPORT_IMAGE: Image Export (CANCELLED)>, <Task EXPORT_IMAGE: Image Export (FAILED)>, <Task EXPORT_IMAGE: Image Export (CANCELLED)>, <Task EXPORT_IMAGE: Image Export (FAILED)>, <Task EXPORT_IMAGE: Image Export (COMPLETED)>, <Task EXPORT_FEATURES: Training Export (COMPLETED)>, <Task EXPORT_FEATURES: Training Export (COMPLETED)>, <Task EXPORT_FEATURES: Testing Export (COMPLETED)>, <Task EXPORT_FEATURES: Testing Export (COMPLETED)>, <Task EXPORT_FEATURES: Training Export (COMPLETED)>, <Task EXPORT_FEATURES: Testing Export (COMPLETED)>, <Task EXPORT_FEATURES: Testing Export (FAILED)>, <Task EXPORT_IMAGE: Image Export (COMPLETED)>, <Task EXPORT_FEATURES: Testing Export (FAILED)>, <Task EXPORT_FEATURES: Testing Export (FAILED)>, <Task 