In [6]:
import ee
import google.auth
import math

In [113]:
import numpy as np


In [7]:
# this is needed to Successfully save authorization token. from ee.Authenticate()
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [8]:
ee.Authenticate()



Successfully saved authorization token.


In [9]:
ee.Initialize()

In [159]:
# bands used for prediction
BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12']

def mask_sentinel2_clouds(image: ee.Image) -> ee.Image:
    CLOUD_BIT = 10
    CIRRUS_CLOUD_BIT = 11
    bit_mask = (1 << CLOUD_BIT) | (1 << CIRRUS_CLOUD_BIT)
    mask = image.select("QA60").bitwiseAnd(bit_mask).eq(0)
    return image.updateMask(mask)
    
def get_input_image(year: int, default_value: float = 1000.0) -> ee.Image:
  return (
      ee.ImageCollection("COPERNICUS/S2_HARMONIZED")        # Sentinel-2 images
      .filterDate(f"{year}-1-1", f"{year}-12-31")           # filter by year
      .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 20))  # filter cloudy images 
      .map(mask_sentinel2_clouds)  # mask/hide cloudy pixels
      .select(BANDS)          # select all bands starting with B
      .median()               # median of all non-cloudy pixels
      .unmask(default_value)  # default value for masked pixels
      .float()                # convert to float32
  )

In [134]:
import tensorflow as tf
print(tf.__version__)

2.11.0


In [135]:
import folium
print(folium.__version__)

0.14.0


In [151]:
countries = ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017")
countryGeometry = countries.filter(ee.Filter.eq('country_na', 'Ukraine'));
country_latLon = 50., 31

In [181]:
LABEL_DATA = ee.ImageCollection("ESA/WorldCover/v100").first() # ee.image
LABEL_DATA = LABEL_DATA.clip(countryGeometry)

igbp_palette = [
    '#419BDF',  # water
    '#397D49',  # Trees
    '#88B053',  # Grass
    '#7A87C6',  # Flooded vegetation
    '#E49635',  # crops
    '#DFC35A',  # shrub and scrub
    '#C4281B',  # built-up areas
    '#A59B8F',  # bare ground
    '#B39FE1',  # snow and ice
]

LABEL_NAMES = [
  'water', 'trees', 'grass', 'flooded_vegetation', 'crops', 'shrub_and_scrub',
  'built', 'bare', 'snow_and_ice'
]
map = folium.Map(location=[50., 31], zoom_start=13)
def get_label_image() -> ee.Image:
    # Remap the ESA classifications into the Dynamic World classifications
    fromValues = [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100]
    toValues = [1, 5, 2, 4, 6, 7, 8, 0, 3, 3, 7]
    return (
        LABEL_DATA
        .select("Map")
        .remap(fromValues, toValues)
        .rename("landcover")
        .unmask(0)  # fill missing values with 0 (water)
        .byte()     # 9 classifications fit into an unsinged 8-bit integer
    )

image_labels = get_label_image()

vis_params = {
  "bands": ["landcover"],
  "max": len(LABEL_NAMES) - 1,
  "palette": igbp_palette,
}
mapid_labels = image_labels.getMapId(vis_params)

folium.TileLayer(
    tiles=mapid_labels['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='esa',
  ).add_to(map)


<folium.raster_layers.TileLayer at 0x7fb05d9828b0>

In [182]:
map

In [183]:
SURF_REF_SEN2 = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED").filterDate('2020-01-01', '2020-12-31')

In [184]:
def cloudmask_surfRefSen2(image):
    opaqueClouds_mask = 1 << 10
    cirrusClouds_mask =1 << 11
    bit_mask =opaqueClouds_mask |  cirrusClouds_mask
    qa = image.select('QA60')
    mask = qa.bitwiseAnd(bit_mask).eq(0)
    return image.clip(countryGeometry).updateMask(mask)

# ee.image.select()  supports regex
image = SURF_REF_SEN2.map(cloudmask_surfRefSen2).select("B{1}\d").filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 20)).median().unmask(1000.0).float()
image = image.divide(10000) 
# we divide by 10000 bc the bands are scaled by 10000 (according to https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S2)


In [185]:
vis_params = {
  "min": 0,
  "max": 0.3,
  "bands": ["B4", "B3", "B2"],
}

mapid = image.getMapId(vis_params)
#map = folium.Map(location=[50., 31], zoom_start=13)

folium.TileLayer(
    tiles=mapid['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='median composite',
  ).add_to(map)


<folium.raster_layers.TileLayer at 0x7fb05c9b57f0>

In [186]:
import geopandas as gpd

shapefile_path = "../data/validation_data/merged_harvest_validation_20220919.shp"
gdf = gpd.read_file(shapefile_path)
lon_lat_df = gdf[['lon', 'lat']]
lon_lat_df = lon_lat_df.dropna()

lon_lat_shapeFile = np.array(lon_lat_df)
lon_lat_shapeFile

array([[25.45868435, 50.65504754],
       [36.2894705 , 50.29463283],
       [32.54848447, 50.8608493 ],
       ...,
       [35.05100326, 46.69351009],
       [33.66820389, 50.45192896],
       [30.63346126, 46.99444801]])

In [187]:
from typing import Iterable

def sample_points(
    region: ee.Geometry, image: ee.Image, points_per_class: int, scale: int) -> Iterable[tuple[float, float]]:
    # points is FeatureCollection 
    points = image.stratifiedSample(
        points_per_class,
        region=region,
        scale=scale,
        geometries=True,
    )
    for point in points.toList(points.size()).getInfo():
        #print(point)
        yield point["geometry"]["coordinates"]

#for [lon, lat] in sample_points(countryGeometry, image_labels, points_per_class=100, scale=1000):
#    folium.Marker(location=[lat, lon], popup = str([lon, lat])).add_to(map)

for [lon, lat] in lon_lat_shapeFile:
    folium.Marker(location=[lat, lon], popup = str([lon, lat])).add_to(map)

folium.LayerControl().add_to(map)
#map

<folium.map.LayerControl at 0x7fb059dc1160>

In [188]:
# uncomment below to see the points marked on the map
#map 

In [189]:
import io
import requests
from google.api_core import exceptions, retry

@retry.Retry(deadline=10 * 60)  # seconds
def get_patch(image: ee.Image, lonlat: tuple[float, float], patch_size: int, scale: int) -> np.ndarray:
    point = ee.Geometry.Point(lonlat)
    url = image.getDownloadURL({
        "region": point.buffer(scale * patch_size / 2, 1).bounds(1),
        "dimensions": [patch_size, patch_size],
        "format": "NPY",
    })

    # If we get "429: Too Many Requests" errors, it's safe to retry the request.
    # The Retry library only works with `google.api_core` exceptions.
    response = requests.get(url)
    if response.status_code == 429:
        raise exceptions.TooManyRequests(response.text)

    # Still raise any other exceptions to make sure we got valid data.
    response.raise_for_status()
    return np.load(io.BytesIO(response.content), allow_pickle=True)

In [190]:
from numpy.lib.recfunctions import structured_to_unstructured

def get_input_patch(year: int, lonlat: tuple[float, float], patch_size: int) -> np.ndarray:
    image = get_input_image(year)
    patch = get_patch(image, lonlat, patch_size, scale=10)
    return structured_to_unstructured(patch)

def get_label_patch(lonlat: tuple[float, float], patch_size: int) -> np.ndarray:
    image = get_label_image()
    patch = get_patch(image, lonlat, patch_size, scale=10)
    return structured_to_unstructured(patch)

In [191]:

def get_testing_data(lonlat: tuple[float, float], patch_size: int = 128) -> tuple[np.ndarray, np.ndarray]:
    """Gets a square around the point lonlat. This square is of length patch_size.


    Args:
        lonlat (tuple[float, float]): _description_
        patch_size (int, optional): _description_. Defaults to 128.

    Returns:
        tuple[np.ndarray, np.ndarray]: _description_
    """
    
    return (
        get_input_patch(2020, lonlat, patch_size),
        get_label_patch(lonlat, patch_size),
    )

point = (country_latLon[1], country_latLon[0]) # (lon, lat)
(inputs, labels) = get_testing_data(point, (512))
print(f"inputs : {inputs.dtype} {inputs.shape}")
print(f"labels : {labels.dtype} {labels.shape}")



inputs : float32 (512, 512, 12)
labels : uint8 (512, 512, 1)


In [192]:
labels[0][500]

array([0], dtype=uint8)

In [161]:
def serialize(inputs: np.ndarray, labels: np.ndarray) -> bytes:
    features = {
        name: tf.train.Feature(
            bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(data).numpy()])
        )
        for name, data in {"inputs": inputs, "labels": labels}.items()
    }
    example = tf.train.Example(features=tf.train.Features(feature=features))
    return example.SerializeToString()

serialized = serialize(inputs, labels)
print(f"serialized: {len(serialized)} bytes")

serialized: 868448 bytes


2023-03-17 00:27:17.114852: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
