<a href="https://colab.research.google.com/github/jpatrickbouchet/land-cover-on-demand/blob/master/2_Generate_training_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install geopandas
!pip install earthengine-api
!pip install tensorflow==2.0.0-beta1

In [0]:
!earthengine authenticate

In [0]:
import os

import json
import geopandas as gpd

# Import the Earth Engine API and initialize it.
import ee
ee.Initialize()

import folium
import tensorflow as tf

In [0]:
# Define the URL format used for Earth Engine generated map tiles.
EE_TILES = 'https://earthengine.googleapis.com/map/{mapid}/{{z}}/{{x}}/{{y}}?token={token}'

In [0]:
from google.colab import auth
auth.authenticate_user()

## Setting dataset framework

Note: To keep things simple for now, I will only work with the RGB band from Sentinel 2 (i.e. B4, B3, B2 bands)

In [0]:
BANDS = ['B4', 'B3', 'B2']

def maskS2clouds(image):
    qa = image.select('QA60')
    
    cloudBitMask = 1 << 10
    cirrusBitMask = 1 << 11

    mask = qa.bitwiseAnd(cloudBitMask).eq(0).And(qa.bitwiseAnd(cirrusBitMask).eq(0))

    return image.updateMask(mask).select(BANDS).divide(10000)

# Map the function over one year of data and take the median.
# Load Sentinel-2 TOA reflectance data.
sentinel2 = ee.ImageCollection('COPERNICUS/S2') \
                    .filterDate('2017-01-01', '2017-12-31') \
                    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20)) \
                    .map(maskS2clouds) \
                    .median()

sentinel2.getInfo()

{'bands': [{'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0],
   'data_type': {'max': 6.553500175476074,
    'min': 0.0,
    'precision': 'float',
    'type': 'PixelType'},
   'id': 'B4'},
  {'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0],
   'data_type': {'max': 6.553500175476074,
    'min': 0.0,
    'precision': 'float',
    'type': 'PixelType'},
   'id': 'B3'},
  {'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0],
   'data_type': {'max': 6.553500175476074,
    'min': 0.0,
    'precision': 'float',
    'type': 'PixelType'},
   'id': 'B2'}],
 'type': 'Image'}

In [0]:
# Rescaling pixel values from 0 to 1 (current scale is 0 to 0.3)
sentinel2 = sentinel2.unitScale(0, 0.3)
sentinel2.getInfo()

{'bands': [{'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0],
   'data_type': {'precision': 'float', 'type': 'PixelType'},
   'id': 'B4'},
  {'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0],
   'data_type': {'precision': 'float', 'type': 'PixelType'},
   'id': 'B3'},
  {'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0],
   'data_type': {'precision': 'float', 'type': 'PixelType'},
   'id': 'B2'}],
 'type': 'Image'}

In [0]:
# Load Land Cover data uploaded as asset on Google Earth Engine
landcover = ee.FeatureCollection('users/jpbouchet/land_cover_training')

In [0]:
# Export regions for training and validation

training_area_path = "train_valid_area.geojson"

# Area of interest
with tf.io.gfile.GFile(training_area_path, 'rb') as geojson:
    training_area = json.load(geojson)
    
training_area

{'features': [{'geometry': {'coordinates': [[[1.932220458984375,
       47.72177305487703],
      [2.63671875, 47.72177305487703],
      [2.63671875, 48.11018210246057],
      [1.932220458984375, 48.11018210246057],
      [1.932220458984375, 47.72177305487703]]],
    'type': 'Polygon'},
   'properties': {'split': 'all_aoi'},
   'type': 'Feature'},
  {'geometry': {'coordinates': [[[1.932220458984375, 47.80502669216753],
      [2.63671875, 47.80502669216753],
      [2.63671875, 48.11018210246057],
      [1.932220458984375, 48.11018210246057],
      [1.932220458984375, 47.80502669216753]]],
    'type': 'Polygon'},
   'properties': {'split': 'train'},
   'type': 'Feature'},
  {'geometry': {'coordinates': [[[1.932220458984375, 47.72177305487703],
      [2.63671875, 47.72177305487703],
      [2.63671875, 47.805],
      [1.932220458984375, 47.805],
      [1.932220458984375, 47.72177305487703]]],
    'type': 'Polygon'},
   'properties': {'split': 'valid'},
   'type': 'Feature'}],
 'type': 'Fea

In [0]:
train_region = ee.Geometry(training_area['features'][1]['geometry'])
valid_region = ee.Geometry(training_area['features'][2]['geometry'])

## Visualise in folium

In [0]:
map1 = folium.Map()

In [0]:
s2_mapid = sentinel2.getMapId({'bands': BANDS, 'min': 0, 'max': 1})
lc_mapid = landcover.getMapId()

map1 = folium.Map(location=[47.744374,1.862480])

folium.TileLayer(
    tiles=EE_TILES.format(**s2_mapid),
    attr='Google Earth Engine',
    overlay=True,
    name='sentinel2 median composite',
  ).add_to(map1)

folium.GeoJson(
    train_region.toGeoJSON(),
    name='train region'
).add_to(map1)

folium.GeoJson(
    valid_region.toGeoJSON(),
    name='valid region'
).add_to(map1)

folium.TileLayer(
    tiles=EE_TILES.format(**lc_mapid),
    attr='Google Earth Engine',
    overlay=True,
    name='corine land cover',
  ).add_to(map1)

folium.LayerControl().add_to(map1)

map1

## Export images

In [0]:
# Convert land cover FeatureCollection to Image object
# Using the LABEL_ID as the reduced value
landcover_img = landcover.reduceToImage(properties=['LABEL_ID'],
                                        reducer=ee.Reducer.mode()) \
                         .rename(['landcover'])

landcover_img.getInfo()

{'bands': [{'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0],
   'data_type': {'max': 9223372036854775807,
    'min': -9223372036854775808,
    'precision': 'int',
    'type': 'PixelType'},
   'id': 'landcover'}],
 'type': 'Image'}

In [0]:
# Add land cover data to sentinel as a band
# so we can export all data in same tfrecord file
final_image = sentinel2.addBands(landcover_img)
final_image.getInfo()

{'bands': [{'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0],
   'data_type': {'precision': 'float', 'type': 'PixelType'},
   'id': 'B4'},
  {'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0],
   'data_type': {'precision': 'float', 'type': 'PixelType'},
   'id': 'B3'},
  {'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0],
   'data_type': {'precision': 'float', 'type': 'PixelType'},
   'id': 'B2'},
  {'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0],
   'data_type': {'max': 9223372036854775807,
    'min': -9223372036854775808,
    'precision': 'int',
    'type': 'PixelType'},
   'id': 'landcover'}],
 'type': 'Image'}

In [0]:
# Creating export tasks

outputBucket = '[OUTPUT_BUCKET_NAME_HERE]'

ExportFormatOptions = {
    'patchDimensions': [256, 256],
    'maxFileSize': 104857600,
    'compressed': True
}

trainImageTask = ee.batch.Export.image.toCloudStorage(
  image=final_image,
  description='train images export',
  fileNamePrefix='train_images_',
  bucket=outputBucket,
  scale=10,
  crs='EPSG:32631',
  fileFormat='TFRecord',
  region=train_region.toGeoJSON()['coordinates'],
  formatOptions=ExportFormatOptions
)

validImageTask = ee.batch.Export.image.toCloudStorage(
  image=final_image,
  description='valid images export',
  fileNamePrefix='valid_images_',
  bucket=outputBucket,
  scale=10,
  crs='EPSG:32631',
  fileFormat='TFRecord',
  region=valid_region.toGeoJSON()['coordinates'],
  formatOptions=ExportFormatOptions
)

In [0]:
# Starting export
# (Happening directly on Google Earth Engine platform)

trainImageTask.start()
validImageTask.start()

In [0]:
# Checking progress of export task for training

import time

print('Export train images (id: {}):'.format(trainImageTask.id))
while trainImageTask.active():
    print('In progres...')
    time.sleep(5)
print('Done with train image export.')

Export train images (id: QFCW5Q3WSJODNAYGNYQ32JEI):
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
In progres...
Done with train image export.
