In [1]:
import ee
import google.auth
import math
import numpy as np
import tensorflow as tf
import folium

2023-03-21 22:08:31.480607: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# this is needed to Successfully save authorization token. from ee.Authenticate()
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [3]:
ee.Authenticate()


Successfully saved authorization token.


In [4]:
ee.Initialize()

In [49]:
# bands used for prediction
BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12']

COUNTRY_GEOMETRY = ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017").filter(ee.Filter.eq('country_na', 'Ukraine'));
COUNTRY_LATLON = 50., 31

LABEL_DATA = ee.ImageCollection("ESA/WorldCover/v100").first() # ee.image
LABEL_DATA = LABEL_DATA.clip(COUNTRY_GEOMETRY)


In [50]:
def mask_sentinel2_clouds(image: ee.Image) -> ee.Image:
    CLOUD_BIT = 10
    CIRRUS_CLOUD_BIT = 11
    bit_mask = (1 << CLOUD_BIT) | (1 << CIRRUS_CLOUD_BIT)
    mask = image.select("QA60").bitwiseAnd(bit_mask).eq(0)
    return image.clip(COUNTRY_GEOMETRY).updateMask(mask)
    
def get_input_image(year: int, default_value: float = 1000.0) -> ee.Image:
  return (
      ee.ImageCollection("COPERNICUS/S2_HARMONIZED")        # Sentinel-2 images
      .filterDate(f"{year}-1-1", f"{year}-12-31")           # filter by year
      .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 20))  # filter cloudy images 
      .map(mask_sentinel2_clouds)  # mask/hide cloudy pixels
      .select(BANDS)          # select all bands starting with B
      .median()               # median of all non-cloudy pixels
      .unmask(default_value)  # default value for masked pixels
      .float()                # convert to float32
  )

In [51]:

igbp_palette = [
    '#419BDF',  # water
    '#397D49',  # Trees
    '#88B053',  # Grass
    '#7A87C6',  # Flooded vegetation
    '#E49635',  # crops
    '#DFC35A',  # shrub and scrub
    '#C4281B',  # built-up areas
    '#A59B8F',  # bare ground
    '#B39FE1',  # snow and ice
]

LABEL_NAMES = [
  'water', 'trees', 'grass', 'flooded_vegetation', 'crops', 'shrub_and_scrub',
  'built', 'bare', 'snow_and_ice'
]
map = folium.Map(location=COUNTRY_LATLON, zoom_start=13)
def get_label_image() -> ee.Image:
    # Remap the ESA classifications into the Dynamic World classifications
    fromValues = [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100]
    toValues = [1, 5, 2, 4, 6, 7, 8, 0, 3, 3, 7]
    return (
        LABEL_DATA
        .select("Map")
        .remap(fromValues, toValues)
        .rename("landcover")
        .unmask(0)  # fill missing values with 0 (water)
        .byte()     # 9 classifications fit into an unsinged 8-bit integer
    )

image_labels = get_label_image()

vis_params = {
  "bands": ["landcover"],
  "max": len(LABEL_NAMES) - 1,
  "palette": igbp_palette,
}
mapid_labels = image_labels.getMapId(vis_params)

folium.TileLayer(
    tiles=mapid_labels['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='esa',
  ).add_to(map)


<folium.raster_layers.TileLayer at 0x7fe36f8e0040>

In [52]:
map

In [53]:
SURF_REF_SEN2 = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED").filterDate('2020-01-01', '2020-12-31')

In [54]:
def cloudmask_surfRefSen2(image):
    opaqueClouds_mask = 1 << 10
    cirrusClouds_mask =1 << 11
    bit_mask =opaqueClouds_mask |  cirrusClouds_mask
    qa = image.select('QA60')
    mask = qa.bitwiseAnd(bit_mask).eq(0)

    return image.clip(COUNTRY_GEOMETRY).updateMask(mask)

# ee.image.select()  supports regex
image = SURF_REF_SEN2.map(cloudmask_surfRefSen2).select(BANDS).filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 20)).median().unmask(1000.0).float()
#image = image.divide(10000) # we divide by 10000 bc the bands are scaled by 10000 (according to https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S2)


image = get_input_image(2020)

In [55]:
vis_params = {
  "min": 0,
  "max": 3000,
  "bands": ["B4", "B3", "B2"],
}

mapid = image.getMapId(vis_params)

folium.TileLayer(
    tiles=mapid['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='median composite',
  ).add_to(map)

<folium.raster_layers.TileLayer at 0x7fe3701160a0>

In [56]:
import geopandas as gpd

shapeFile_path = "../data/validation_data/merged_harvest_validation_20220919.shp"
shapeFile = gpd.read_file(shapeFile_path)
shapeFile[['longitude', 'latitude']] =  shapeFile[['lon', 'lat']]
lon_lat_df = shapeFile[['lon', 'lat']]
lon_lat_df = lon_lat_df.dropna()

lons_lats_np = np.array(lon_lat_df)
lons_lats_np

array([[25.45868435, 50.65504754],
       [36.2894705 , 50.29463283],
       [32.54848447, 50.8608493 ],
       ...,
       [35.05100326, 46.69351009],
       [33.66820389, 50.45192896],
       [30.63346126, 46.99444801]])

In [57]:
def createCoordinatesFC(lons_lats):
    """ Converts a 2-d np.array of two columns into a feature collection.

    Args:
        lons_lats (np.array): has shape (N, 2)

    Returns:
        ee.FeatureCollection
    """

    points = []
    # Create a list of ee.Geometry.Point objects from the latitudes and longitudes
    for [lon, lat] in lons_lats:
        point = ee.Geometry.Point(lon, lat)
        points.append(point)

    # Create a ee.FeatureCollection from the list of points
    return ee.FeatureCollection(points)


points_fc = createCoordinatesFC(lons_lats_np)

In [58]:
# Overlay the points on the imagery to get training.
# Note: image_labels is an image too
training_fc = image.addBands(image_labels).sampleRegions(
  collection= points_fc,
  scale= 30
)

In [59]:
%%script echo skipping
# comment out script above to run this cell

import time

# Export the ee.FeatureCollection as a .GeoJSON file.
task = ee.batch.Export.table.toDrive(**{
  'collection': training_fc,
  'description':'training_data_overlayed_from_shape_file',
  'fileFormat': 'GeoJSON'
})
task.start()

while task.active():
  print('Polling for task (id: {}).'.format(task.id))
  time.sleep(5)

skipping


In [60]:
training_df = gpd.read_file("../data/training_data_overlayed_from_shape_file.geojson")
training_df.head()

Unnamed: 0,id,B1,B11,B12,B2,B3,B4,B5,B6,B7,B8,B8A,B9,landcover,geometry
0,0_0,1361.0,1839.0,985.5,1088.5,928.0,682.0,1037.5,2267.5,2811.0,2859.0,3090.0,1009.0,4,MULTIPOINT EMPTY
1,1_0,1252.0,1952.0,1216.5,981.5,949.5,717.5,1072.0,1602.5,1834.0,1902.0,2177.0,600.0,4,MULTIPOINT EMPTY
2,2_0,1383.0,2063.0,1313.5,1108.5,900.0,932.0,1050.0,1942.0,2300.0,2385.0,2777.0,750.0,4,MULTIPOINT EMPTY
3,3_0,1404.0,1826.0,1292.0,1144.0,945.0,867.0,1086.0,1709.0,1991.0,1966.0,2248.0,616.0,4,MULTIPOINT EMPTY
4,4_0,1291.0,1521.0,1153.5,1025.0,850.5,773.0,850.0,1372.5,1576.5,1623.5,1796.5,548.0,4,MULTIPOINT EMPTY


In [61]:
#training_df = training_df.drop(["id", "geometry"], axis=1) # drop columns id, geometry

labels_np = (training_df[['landcover']]).to_numpy()
training_np = (training_df[BANDS]).to_numpy()

print(training_np.shape, labels_np.shape)

(558, 12) (558, 1)


In [62]:
from typing import Iterable

def sample_points(
    region: ee.Geometry, image: ee.Image, points_per_class: int, scale: int) -> Iterable[tuple[float, float]]:
    # points is FeatureCollection 
    points = image.stratifiedSample(
        points_per_class,
        region=region,
        scale=scale,
        geometries=True,
    )
    for point in points.toList(points.size()).getInfo():
        #print(point)
        yield point["geometry"]["coordinates"]

#for [lon, lat] in sample_points(countryGeometry, image_labels, points_per_class=100, scale=1000):
#    folium.Marker(location=[lat, lon], popup = str([lon, lat])).add_to(map)

for [lon, lat] in lons_lats_np:
    folium.Marker(location=[lat, lon], popup = str([lon, lat])).add_to(map)

folium.LayerControl().add_to(map)

<folium.map.LayerControl at 0x7fe370373a60>

In [63]:
map

In [64]:
import io
import requests
from google.api_core import exceptions, retry

@retry.Retry(deadline=10 * 60)  # seconds
def get_patch(image: ee.Image, lonlat: tuple[float, float], patch_size: int, scale: int) -> np.ndarray:
    point = ee.Geometry.Point(lonlat)
    url = image.getDownloadURL({
        "region": point.buffer(scale * patch_size / 2, 1).bounds(1),
        "dimensions": [patch_size, patch_size],
        "format": "NPY",
    })

    # If we get "429: Too Many Requests" errors, it's safe to retry the request.
    # The Retry library only works with `google.api_core` exceptions.
    response = requests.get(url)
    if response.status_code == 429:
        raise exceptions.TooManyRequests(response.text)

    # Still raise any other exceptions to make sure we got valid data.
    response.raise_for_status()
    return np.load(io.BytesIO(response.content), allow_pickle=True)

In [65]:
from numpy.lib.recfunctions import structured_to_unstructured

def get_input_patch(year: int, lonlat: tuple[float, float], patch_size: int) -> np.ndarray:
    image = get_input_image(year)
    patch = get_patch(image, lonlat, patch_size, scale=10)
    return structured_to_unstructured(patch)

def get_label_patch(lonlat: tuple[float, float], patch_size: int) -> np.ndarray:
    image = get_label_image()
    patch = get_patch(image, lonlat, patch_size, scale=10)
    return structured_to_unstructured(patch)

In [67]:

def get_testing_data(lonlat: tuple[float, float], patch_size: int = 128) -> tuple[np.ndarray, np.ndarray]:
    """Gets a square around the point lonlat. This square is of length patch_size.


    Args:
        lonlat (tuple[float, float]): _description_
        patch_size (int, optional): _description_. Defaults to 128.

    Returns:
        tuple[np.ndarray, np.ndarray]: _description_
    """
    
    return (
        get_input_patch(2020, lonlat, patch_size),
        get_label_patch(lonlat, patch_size),
    )

point = (COUNTRY_LATLON[1], COUNTRY_LATLON[0]) # (lon, lat)
(testing_inputs, testing_labels) = get_testing_data(point, (512))
print(f"inputs : {testing_inputs.dtype} {testing_inputs.shape}")
print(f"labels : {testing_labels.dtype} {testing_labels.shape}")

inputs : float32 (512, 512, 12)
labels : uint8 (512, 512, 1)


In [68]:
def serialize(inputs: np.ndarray, labels: np.ndarray) -> bytes:
    features = {
        name: tf.train.Feature(
            bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(data).numpy()])
        )
        for name, data in {"inputs": inputs, "labels": labels}.items()
    }
    example = tf.train.Example(features=tf.train.Features(feature=features))
    return example.SerializeToString()

serialized = serialize(testing_inputs, testing_labels)
print(f"serialized: {len(serialized)} bytes")

serialized: 12845158 bytes


2023-03-21 22:49:42.732189: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
