# Exporting Data from Google Earth Engine as TensorFlow Records

This notebook is still a **work in progress**.

# 1. Config

In [None]:
import ee
import folium
import geopandas as gpd
import tensorflow as tf

In [None]:
ee.Initialize()

In [None]:
BANDS = ["HRSC"]
RESPONSE = "FAULTLINE"
FEATURES = BANDS + [RESPONSE]

KERNEL_SIZE = 64
COLUMNS = [
    tf.io.FixedLenFeature(shape=[KERNEL_SIZE, KERNEL_SIZE], dtype=tf.float32) for _ in FEATURES
]
FEATURES_DICT = dict(zip(FEATURES, COLUMNS))

# 2. Data

In [None]:
# Get data
faultlines_raster = ee.Image("projects/esg-satelite/assets/mars/labels/faultlines_raster")
# hrsc_sample = ee.Image("projects/esg-satelite/assets/mars/features/post/hrsc_sample")

In [None]:
# Create stack
image_stack = ee.Image.cat(
    [
        faultlines_raster,
        hrsc_sample
    ]
)

In [None]:
# NOTE: Not sure why we do this 
image_stack = image_stack.float()

In [None]:
# Makes a 64x64 tensor of 1s
# This is because we want, for each pixel, to take every 64x64 pixel around that one
# The 1s just mean we aren't applying any kind of transformation
ee_list = ee.List.repeat(1,KERNEL_SIZE)
ee_lists = ee.List.repeat(ee_list,KERNEL_SIZE)
kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, ee_lists)

In [None]:
# Makes a feature collection of "patches" which are regions (geometries) of different sets of data
# You might have a train, validation, and test patch, for example
PATCHES_JSON = {
  "type": "FeatureCollection",
  "features": [
    {
      "type":"Feature",
      "properties": {},
      "geometry": {
        "coordinates": [
          [
            [
              -75.69403257726853,
              45.43314727092101
            ],
            [
              -75.69403257726853,
              41.20493154723766
            ],
            [
              -69.91084889191063,
              41.20493154723766
            ],
            [
              -69.91084889191063,
              45.43314727092101
            ],
            [
              -75.69403257726853,
              45.43314727092101
            ]
          ]
        ],
        "type": "Polygon"
      }
    }
  ]
}

PATCHES = ee.FeatureCollection(PATCHES_JSON)

In [None]:
# I am not convinced we apply the kernel in this code! Please can we check this.
# Kernel should be applied to create "image_stack_neighbours" which should
# have 64x64 tensor at every pixel. Then, the below function should sample and take
# 1000 of those pixels at random (i.e. 1000 64x64 images) 

In [None]:
# We have set the numPixels to 1000
#

patch_list = PATCHES.toList(PATCHES.size())
task_list = []

# Extracts feature from the feature collection, and gets the geom property
for geometry_index in range(PATCHES.size().getInfo()):
    image_stack_sample = image_stack_neighbours.sample(
        region=ee.Feature(patch_list.get(geometry_index)).geometry(),
        scale=1000,
        numPixels=1000,
        seed=123
    )

    desc = f"Features and Labels for region: {geometry_index}"
    task = ee.batch.Export.table.toCloudStorage(
        collection=image_stack_sample,
        description=desc,
        bucket="esg-satelite-data-warehouse",
        fileNamePrefix=f"mars/modelling/modelling_data_{geometry_index}",
        fileFormat = "TFRecord"
    )
    task_list.append(task)

In [None]:
# Check the number of samples received.
# It should be
image_stack_sample.size().getInfo()

In [None]:
# for task in task_list:
#     task.start()