# Set up environment

In [None]:
# Cloud authentication
from google.colab import auth
auth.authenticate_user()

In [None]:
# Import, authenticate and initialize the Earth Engine library
import ee
ee.Authenticate()
ee.Initialize()

In [None]:
# Import modules
import tensorflow as tf
print(tf.__version__)

import folium
print(folium.__version__)

In [None]:
# Define variables depending on the type of classes

## Binary class
# label_folder = '/TrainBinLbl'
# train_base = 'binary_train'
# val_base = 'binary_val'

## Multi-class
label_folder = '/TrainMultiLbl'
train_base = 'multi_train'
val_base = 'multi_val'

## Sub class
# label_folder = '/TrainSubLbl'
# train_base = 'sub_train'
# val_base = 'sub_val'

# Input data

In [None]:
base_folder = 'projects/ee-shoyo7566/assets'

# Input data (image, label and mask)
image = ee.Image(base_folder + '/CAPELLA').rename('Capella')
label = ee.Image(base_folder + label_folder).rename('Label')
mask = ee.Image(base_folder + '/TrainMsk').rename('Mask')

# Normalize the image
def normalize(image):
    bandNames = image.bandNames();
    #Compute min and max of the image
    minDict = image.reduceRegion(
    reducer= ee.Reducer.min(),
    geometry= image.geometry(),
    maxPixels= 1e13,
    bestEffort= True,
    tileScale= 16);

    maxDict = image.reduceRegion(
    reducer= ee.Reducer.max(),
    geometry= image.geometry(),
    maxPixels= 1e13,
    bestEffort= True,
    tileScale= 16);
    
    mins = ee.Image.constant(minDict.values(bandNames));
    maxs = ee.Image.constant(maxDict.values(bandNames));
    normalized = image.subtract(mins).divide(maxs.subtract(mins))
    return normalized

image = normalize(image).float()

In [None]:
# Extent of the training/validation region
north = -2.157919248798915
south = -2.1782906099428896
east = 112.17029847816156
west = 112.14685391045177
xmid = (east + west) / 2

# Split the region
y1 = 0.14 * (south - north) + north
y2 = 0.28 * (south - north) + north
y3 = 0.42 * (south - north) + north
y4 = 0.56 * (south - north) + north
y5 = 0.70 * (south - north) + north
y6 = 0.84 * (south - north) + north

geometry = ee.Geometry.Polygon(
        [[[west, y1],
          [xmid, y1],
          [xmid, y2],
          [west, y2]]])
geometry2 = ee.Geometry.Polygon(
        [[[xmid, y1],
          [east, y1],
          [east, y2],
          [xmid, y2]]])
geometry3 = ee.Geometry.Polygon(
        [[[west, y2],
          [xmid, y2],
          [xmid, y3],
          [west, y3]]])
geometry4 = ee.Geometry.Polygon(
        [[[xmid, y2],
          [east, y2],
          [east, y3],
          [xmid, y3]]])
geometry5 = ee.Geometry.Polygon(
        [[[west, y3],
          [xmid, y3],
          [xmid, y4],
          [west, y4]]])
geometry6 = ee.Geometry.Polygon(
        [[[xmid, y3],
          [east, y3],
          [east, y4],
          [xmid, y4]]])
geometry7 = ee.Geometry.Polygon(
        [[[west, y4],
          [xmid, y4],
          [xmid, y5],
          [west, y5]]])
geometry8 = ee.Geometry.Polygon(
        [[[xmid, y4],
          [east, y4],
          [east, y5],
          [xmid, y5]]])
geometry9 = ee.Geometry.Polygon(
        [[[west, y5],
          [xmid, y5],
          [xmid, y6],
          [west, y6]]])
geometry10 = ee.Geometry.Polygon(
        [[[xmid, y5],
          [east, y5],
          [east, y6],
          [xmid, y6]]])
geometry11 = ee.Geometry.Polygon(
        [[[west, y6],
          [xmid, y6],
          [xmid, south],
          [west, south]]])
geometry12 = ee.Geometry.Polygon(
        [[[xmid, y6],
          [east, y6],
          [east, south],
          [xmid, south]]])
          
val1 =  ee.Geometry.Polygon(
        [[[west, north],
          [xmid, north],
          [xmid, y1],
          [west, y1]]])

val2 =  ee.Geometry.Polygon(
        [[[xmid, north],
          [east, north],
          [east, y1],
          [xmid, y1]]])

# Synthesize the data from each polygon and create feature collections
trainingPolys = ee.FeatureCollection([ee.Feature(geometry),
                                   ee.Feature(geometry2),
                                   ee.Feature(geometry3),
                                   ee.Feature(geometry4),
                                   ee.Feature(geometry5),
                                   ee.Feature(geometry6),
                                   ee.Feature(geometry7),
                                   ee.Feature(geometry8),
                                   ee.Feature(geometry9),
                                   ee.Feature(geometry10),
                                   ee.Feature(geometry11),
                                   ee.Feature(geometry12)
                                   ])

evalPolys =  ee.FeatureCollection([ee.Feature(val1),
                                   ee.Feature(val2)])

# Convert the feature collections to lists for iteration
trainingPolysList = trainingPolys.toList(trainingPolys.size())
evalPolysList = evalPolys.toList(evalPolys.size())

In [None]:
# Specify the size expected by the model
KERNEL_SIZE = 128

# Stack features
featureStack = ee.Image.cat([
  image.select('Capella'),
  label.select('Label'),
  mask.select('Mask')
]).float()

lst = ee.List.repeat(1, KERNEL_SIZE)                        # [1,1,1,...,1] size=128
lists = ee.List.repeat(lst, KERNEL_SIZE)                    # [[1,...,1], [1,...,1],..., [1,...,1]] size=128*128
kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, lists)   # [1,...,1] [1,...,1] ... [1,...,1] 128 kernels of size 128
arrays = featureStack.neighborhoodToArray(kernel)           # kernel-sized array with 3 bands (image, label, mask)

# Take samples and Save them to a Google Drive folder

In [None]:
# Determine training and validation data size

# Training data size = 12(patches per polygon) * 12(polygons) * 128 * 128 * 24 -> 54(pixels per patch) * 0.33 = 18685624
# Validation data size = 12(patches per polygon) * 2(polygons) * 128 * 128 * 18 -> 47(pixels per patch) * 0.33 = 2335703
# Test data size = 2344537

# Training data size = 100(patches per polygon) * 12(polygons) * 120(pixels per patch) = 144000
# Validation data size = 100(patches per polygon) * 2(polygons) * 90(pixels per patch) = 18000
# Test data size = 2344537

Npatches = 100 # Number of patches in each polygon
Npixels = 120
Npixels_val = 90

In [None]:
# Export all the training data in many pieces, with one task per geometry
for g in range(trainingPolys.size().getInfo()):
  geomSample = ee.FeatureCollection([])
  for i in range(Npatches):
    sample = arrays.sample(
      numPixels = Npixels,
      region = ee.Feature(trainingPolysList.get(g)).geometry(), 
      seed = i,
      tileScale = 16,
      )
    geomSample = geomSample.merge(sample)

  desc = train_base + str(g)
  task = ee.batch.Export.table.toDrive(
            collection = geomSample,
            description = desc, 
            folder = 'Thesis2',
            fileNamePrefix = desc,
            fileFormat = 'TFRecord',
            selectors = ['Capella', 'Label', 'Mask']
            )
  task.start()

# Export all the validation data in many pieces, with one task per geometry
for g in range(evalPolys.size().getInfo()):
  geomSample = ee.FeatureCollection([])
  for i in range(Npatches):
    sample = arrays.sample(
      numPixels = Npixels_val, 
      region = ee.Feature(evalPolysList.get(g)).geometry(), 
      seed = i,
      tileScale = 16,
    )
    geomSample = geomSample.merge(sample)

  desc = val_base + str(g)
  task = ee.batch.Export.table.toDrive(
            collection = geomSample,
            description = desc, 
            folder = 'Thesis2',
            fileNamePrefix = desc,
            fileFormat = 'TFRecord',
            selectors = ['Capella', 'Label', 'Mask']
            )
  task.start()