# Exporting Data from Google Earth Engine as TensorFlow Records

This notebook is still a **work in progress**.

# 1. Config

In [1]:
import ee
import folium
import geopandas as gpd
import tensorflow as tf

2023-09-08 13:27:14.506255: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-09-08 13:27:15.313720: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2023-09-08 13:27:15.313822: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/loca

In [2]:
# Creating add_ee_layer function
# This allows us to visualise EE objects on a Folium map
def add_ee_layer(self, ee_image_object, vis_params, name):
    map_id_dict = ee_image_object.getMapId(vis_params)
    folium.raster_layers.TileLayer(
        tiles = map_id_dict['tile_fetcher'].url_format,
        attr = 'Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
        name = name,
        overlay = True,
        control = True
    ).add_to(self)

folium.Map.add_ee_layer = add_ee_layer

In [3]:
# ee.Authenticate()

In [4]:
ee.Initialize()

In [5]:
BANDS = ["b1"]
RESPONSE = "FAULTLINE"
FEATURES = BANDS + [RESPONSE]

KERNEL_SIZE = 128
COLUMNS = [
    tf.io.FixedLenFeature(shape=[KERNEL_SIZE, KERNEL_SIZE], dtype=tf.float32) for _ in FEATURES
]
FEATURES_DICT = dict(zip(FEATURES, COLUMNS))

# 2. Data

In [6]:
# Get data
themis_tempe_terra = ee.Image("projects/esg-satelite/assets/mars/features/themis_epsg3857_sample/pre/themis_epsg3857_sample")
faultline_vectors = ee.FeatureCollection("projects/esg-satelite/assets/mars/labels/faults/pre/faults")

In [7]:
# Add a columns of ones 
# This assigns each geometry (each fault/vector) to the value 1
# and assigns every other space to nothing (which we will fix later)
def add_column(feature):
    return feature.set({"Value": 1})

faultline_vectors_with_ones = faultline_vectors.map(add_column)

faultlines_raster = (
    # Convert to image
    faultline_vectors_with_ones.reduceToImage(
        properties=["Value"],
        reducer=ee.Reducer.first()
    )
    # Change band name "first"  to "FAULTLINE" 
    .select(["first"], ["FAULTLINE"])
    # Unmask, which changes all non-fault pixels to have value 0
    .unmask(0)
)

In [8]:
# Apply line detection algo to data for another layer
canny = ee.Algorithms.CannyEdgeDetector(
    image = themis_tempe_terra,
    threshold = 200,
    sigma = 1
).select(["b1"],["CANNY"])

In [9]:
# Create stack
image_stack = ee.Image.cat(
    [
        faultlines_raster,
        # canny,
        themis_tempe_terra
    ]
)

In [10]:
# NOTE: Not sure why we do this 
image_stack = image_stack.float()

In [11]:
# Makes a 64x64 tensor of 1s
# This is because we want, for each pixel, to take every 64x64 pixel around that one
# The 1s just mean we aren't applying any kind of transformation
ee_list = ee.List.repeat(1,KERNEL_SIZE)
ee_lists = ee.List.repeat(ee_list,KERNEL_SIZE)
kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, ee_lists)

image_stack_neighbours = image_stack.neighborhoodToArray(kernel)

In [12]:
# Makes a feature collection of "patches" which are regions (geometries) of different sets of data
# You might have a train, validation, and test patch, for example
PATCHES_JSON = {
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "coordinates": [
          [
            [
              -33.918757526645976,
              26.49222375015387
            ],
            [
              -40.26040404499685,
              26.49222375015387
            ],
            [
              -40.26040404499685,
              20.974775529401086
            ],
            [
              -33.918757526645976,
              20.974775529401086
            ],
            [
              -33.918757526645976,
              26.49222375015387
            ]
          ]
        ],
        "type": "Polygon"
      }
    },
    {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "coordinates": [
          [
            [
              -41.00986944735257,
              20.362429056777785
            ],
            [
              -47.34052480491607,
              20.362429056777785
            ],
            [
              -47.34052480491607,
              14.877329446085028
            ],
            [
              -41.00986944735257,
              14.877329446085028
            ],
            [
              -41.00986944735257,
              20.362429056777785
            ]
          ]
        ],
        "type": "Polygon"
      }
    },
    {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "coordinates": [
          [
            [
              -44.52875162673706,
              24.15410327553623
            ],
            [
              -44.52875162673706,
              21.604066258136825
            ],
            [
              -41.448118490651126,
              21.604066258136825
            ],
            [
              -41.448118490651126,
              24.15410327553623
            ],
            [
              -44.52875162673706,
              24.15410327553623
            ]
          ]
        ],
        "type": "Polygon"
      }
    }
  ]
}
PATCHES = ee.FeatureCollection(PATCHES_JSON)

In [13]:
# Map = folium.Map()

# Map.add_ee_layer(
#     themis_tempe_terra,
#     {},
#     "Mars THEMIS"
# )

# Map.add_ee_layer(
#     faultlines_raster.updateMask(faultlines_raster.eq(1)),
#     {"min": 0, "max": 1},
#     "Fault Lines"
# )

# Map.add_ee_layer(
#     PATCHES,
#     {},
#     "Regions"
# )

# _ = folium.LayerControl().add_to(Map)

# Map

In [14]:
# We have set the numPixels to 1000
patch_list = PATCHES.toList(PATCHES.size())

task_list = []
number_of_shards = 100
sample_size = 1000
names = ["train", "val", "test"]

# Extracts feature from the feature collection, and gets the geom property
for geometry_index in range(PATCHES.size().getInfo()):
    
    image_stack_sample = ee.FeatureCollection([])
    for i in range(number_of_shards):
        
        sample = image_stack_neighbours.sample(
            region=ee.Feature(patch_list.get(geometry_index)).geometry(),
            scale=500,
            numPixels=sample_size/number_of_shards,
            seed=i
        )
        image_stack_sample = image_stack_sample.merge(sample)
        
    desc = f"Features and Labels for region: {names[geometry_index]}"
    task = ee.batch.Export.table.toCloudStorage(
        collection=image_stack_sample,
        description=desc,
        bucket="esg-satelite-data-warehouse",
        fileNamePrefix=f"mars/modelling/themis_tempeterra_epsg3857_v1/modelling_data_{names[geometry_index]}",
        fileFormat = "TFRecord"
    )
    task_list.append(task)

In [15]:
for task in task_list:
    task.start()

In [17]:
task_list[0].status()

{'state': 'RUNNING',
 'description': 'Features and Labels for region: train',
 'creation_timestamp_ms': 1694179639538,
 'update_timestamp_ms': 1694179649402,
 'start_timestamp_ms': 1694179649367,
 'task_type': 'EXPORT_FEATURES',
 'attempt': 1,
 'id': 'PQAPWTQ2UCSQIXCA46XVGLI7',
 'name': 'projects/earthengine-legacy/operations/PQAPWTQ2UCSQIXCA46XVGLI7'}