# Exporting Data from Google Earth Engine as TensorFlow Records

This notebook is still a **work in progress**.

# 1. Config

In [45]:
import ee
import folium
import geopandas as gpd
import tensorflow as tf

In [46]:
# Creating add_ee_layer function
# This allows us to visualise EE objects on a Folium map
def add_ee_layer(self, ee_image_object, vis_params, name):
    map_id_dict = ee_image_object.getMapId(vis_params)
    folium.raster_layers.TileLayer(
        tiles = map_id_dict['tile_fetcher'].url_format,
        attr = 'Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
        name = name,
        overlay = True,
        control = True
    ).add_to(self)

folium.Map.add_ee_layer = add_ee_layer

In [47]:
ee.Initialize()

In [59]:
BANDS = ["b1", "CANNY"]
RESPONSE = "FAULTLINE"
FEATURES = BANDS + [RESPONSE]

KERNEL_SIZE = 64 #256 
COLUMNS = [
    tf.io.FixedLenFeature(shape=[KERNEL_SIZE, KERNEL_SIZE], dtype=tf.float32) for _ in FEATURES
]
FEATURES_DICT = dict(zip(FEATURES, COLUMNS))

# 2. Data

In [49]:
# Get data
faultlines_raster = ee.Image("projects/esg-satelite/assets/mars/labels/faults/post/faults_raster")
themis_tempe_terra = ee.Image("projects/esg-satelite/assets/mars/features/themis_epsg3857_sample/pre/themis_epsg3857_sample")

In [58]:
# Read in data from EE
faultline_vectors = ee.FeatureCollection("projects/esg-satelite/assets/mars/labels/faults/pre/faults")

# Add a columns of ones 
# This assigns each geometry (each fault/vector) to the value 1
# and assigns every other space to nothing (which we will fix later)
def add_column(feature):
    return feature.set({"Value": 1})

faultline_vectors_with_ones = faultline_vectors.map(add_column)

faultlines_raster = (
    # Convert to image
    faultline_vectors_with_ones.reduceToImage(
        properties=["Value"],
        reducer=ee.Reducer.first()
    )
    # Change band name "first"  to "FAULTLINE" 
    .select(["first"], ["FAULTLINE"])
    # Unmask, which changes all non-fault pixels to have value 0
    .unmask(0)
)

In [60]:
# Apply line detection algo to data for another layer
canny = ee.Algorithms.CannyEdgeDetector(
    image = themis_tempe_terra,
    threshold = 200,
    sigma = 1
).select(["b1"],["CANNY"])

In [61]:
canny.bandNames().getInfo()

['CANNY']

In [62]:
# Create stack
image_stack = ee.Image.cat(
    [
        faultlines_raster,
        canny,
        themis_tempe_terra
    ]
)

In [63]:
# NOTE: Not sure why we do this 
image_stack = image_stack.float()

In [64]:
# Makes a 64x64 tensor of 1s
# This is because we want, for each pixel, to take every 64x64 pixel around that one
# The 1s just mean we aren't applying any kind of transformation
ee_list = ee.List.repeat(1,KERNEL_SIZE)
ee_lists = ee.List.repeat(ee_list,KERNEL_SIZE)
kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, ee_lists)

image_stack_neighbours = image_stack.neighborhoodToArray(kernel)

In [65]:
# Makes a feature collection of "patches" which are regions (geometries) of different sets of data
# You might have a train, validation, and test patch, for example
PATCHES_JSON = {
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "coordinates": [
          [
            [
              -33.918757526645976,
              26.49222375015387
            ],
            [
              -40.26040404499685,
              26.49222375015387
            ],
            [
              -40.26040404499685,
              20.974775529401086
            ],
            [
              -33.918757526645976,
              20.974775529401086
            ],
            [
              -33.918757526645976,
              26.49222375015387
            ]
          ]
        ],
        "type": "Polygon"
      }
    },
    {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "coordinates": [
          [
            [
              -41.00986944735257,
              20.362429056777785
            ],
            [
              -47.34052480491607,
              20.362429056777785
            ],
            [
              -47.34052480491607,
              14.877329446085028
            ],
            [
              -41.00986944735257,
              14.877329446085028
            ],
            [
              -41.00986944735257,
              20.362429056777785
            ]
          ]
        ],
        "type": "Polygon"
      }
    },
    {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "coordinates": [
          [
            [
              -44.52875162673706,
              24.15410327553623
            ],
            [
              -44.52875162673706,
              21.604066258136825
            ],
            [
              -41.448118490651126,
              21.604066258136825
            ],
            [
              -41.448118490651126,
              24.15410327553623
            ],
            [
              -44.52875162673706,
              24.15410327553623
            ]
          ]
        ],
        "type": "Polygon"
      }
    }
  ]
}
PATCHES = ee.FeatureCollection(PATCHES_JSON)

In [66]:
Map = folium.Map()

Map.add_ee_layer(
    themis_tempe_terra,
    {},
    "Mars THEMIS"
)

Map.add_ee_layer(
    faultlines_raster.updateMask(faultlines_raster.eq(1)),
    {"min": 0, "max": 1},
    "Fault Lines"
)

Map.add_ee_layer(
    PATCHES,
    {},
    "Regions"
)

_ = folium.LayerControl().add_to(Map)

Map

In [67]:
task = ee.batch.Export.image.toAsset(
    image_stack_neighbours,
    description = "Image_Stack",
    assetId="projects/esg-satelite/assets/mars/stack",
    # region=GEOMETRY_WORLD,
    scale=500,
    maxPixels=1e11 
    # TODO: There is a crs parameter that can be assigned here?
)

In [68]:
#task.start()

In [None]:
#image_stack_neighbours = ee.Image("projects/esg-satelite/assets/mars/stack")

In [69]:
# We have set the numPixels to 1000

patch_list = PATCHES.toList(PATCHES.size())
task_list = []

names = ["train", "val", "test"]
# Extracts feature from the feature collection, and gets the geom property
for geometry_index in range(PATCHES.size().getInfo()):
    image_stack_sample = image_stack_neighbours.sample(
        region=ee.Feature(patch_list.get(geometry_index)).geometry(),
        scale=500,
        numPixels=1000,
        seed=123,
        tileScale=4
    )

    desc = f"Features and Labels for region: {names[geometry_index]}"
    task = ee.batch.Export.table.toCloudStorage(
        collection=image_stack_sample,
        description=desc,
        bucket="esg-satelite-data-warehouse",
        fileNamePrefix=f"mars/modelling/themis_tempeterra_500_64/modelling_data_{names[geometry_index]}",
        fileFormat = "TFRecord"
    )
    task_list.append(task)

In [70]:
# Check the number of samples received.
# It should be
#image_stack_sample.size().getInfo()

In [71]:
for task in task_list:
    task.start()

In [72]:
task_list[0].status()

{'state': 'READY',
 'description': 'Features and Labels for region: train',
 'creation_timestamp_ms': 1694174562143,
 'update_timestamp_ms': 1694174562143,
 'start_timestamp_ms': 0,
 'task_type': 'EXPORT_FEATURES',
 'id': 'CSU35IGQUF5TJ7M6VGVNBO5G',
 'name': 'projects/earthengine-legacy/operations/CSU35IGQUF5TJ7M6VGVNBO5G'}