In [1]:
from IPython.display import display
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd

import datetime
from pathlib import Path

import intake
import rasterio
import json
import pickle

import ee
from geemap import geemap

In [2]:
ee.Authenticate()
ee.Initialize(project='sentinel-treeclassification')

In [3]:
class SentinelGetter:
    def mask_s2_clouds(self, image):
      # Quality assessment with resolution in meters
      qa = image.select('QA60')
      # Bits 10 and 11 are clouds and cirrus, respectively.
      cloud_bit_mask = 1 << 10
      cirrus_bit_mask = 1 << 11
      # Both flags should be set to zero, indicating clear conditions.
      mask = (
          qa.bitwiseAnd(cloud_bit_mask)
          .eq(0)
          .And(qa.bitwiseAnd(cirrus_bit_mask).eq(0))
      )
      return image.updateMask(mask)

    def get_image(self, bbox, start_date, end_date, selected_bands):
        image = (
            ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
            .filterDate(start_date, end_date)
            # Pre-filter to get less cloudy granules.
            .map(self.mask_s2_clouds)
            .select(selected_bands)
            .mean()
            .clip(bbox)
        )
        return image

In [23]:
catalog = intake.open_catalog(Path('../catalog.yml'))
source = getattr(catalog, 'treesat')
gdf = source.read()[source.metadata['usecols']]

total_bounds = gdf.buffer(100, cap_style=3).to_crs(epsg=4326).geometry.total_bounds
bbox = ee.Geometry.BBox(*total_bounds)

gdf = gdf.to_crs(epsg=4326)

In [27]:
target = source.metadata['categories']['generic']
gdf[target] = gdf[target].astype('category')

category_map = dict(zip(gdf[target].cat.codes, gdf[target]))

gdf[target] = gdf[target].cat.codes

In [5]:
test_date = datetime.datetime(2020, 12, 17)
start_date = test_date.replace(day=1)
end_date = start_date.replace(month=start_date.month%12 + 1, year=start_date.year + start_date.month//12 )
selected_bands = ['B[2-8]', 'B8A', 'B11', 'B12', 'TCI_R', 'TCI_G', 'TCI_B']
sentinel_image = SentinelGetter().get_image(bbox, start_date, end_date, selected_bands)

In [6]:
feature_collection = geemap.geopandas_to_ee(gdf.sample(10))

In [7]:
map = geemap.Map()
map.addLayerControl()
map.setCenter(total_bounds[0::2].sum()/2, total_bounds[1::2].sum()/2, 8)
url = 'https://mt1.google.com/vt/lyrs=y&x={x}&y={y}&z={z}'
map.add_tile_layer(url, name='Google Map', attribution='Google')
map.addLayer(feature_collection, {}, "geopandas to ee")

In [88]:
%%time
dfs = []
for chunk in tqdm(np.array_split(gdf.sample(100), gdf.sample(100).shape[0]//10)):
    feature_collection = geemap.gdf_to_ee(chunk)
    sample_regions = sentinel_image.sampleRegions(
        collection=feature_collection, scale=10, geometries=True)
    df = geemap.ee_to_gdf(sample_regions)
    # info = sample_regions.getInfo()
    # df = pd.json_normalize(info['features'])
    dfs.append(df)

  0%|          | 0/10 [00:00<?, ?it/s]

CPU times: user 357 ms, sys: 70 ms, total: 427 ms
Wall time: 16.4 s


In [89]:
df = pd.concat(dfs, ignore_index=True, sort=False)

In [91]:
dfs[0].columns

Index(['geometry', 'B11', 'B12', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8',
       'B8A', 'TCI_B', 'TCI_G', 'TCI_R', 'l2'],
      dtype='object')

In [119]:
gdfs = []
for i, row in tqdm(gdf.sample(100).iterrows(), total=gdf.sample(100).shape[0]):
    bbox = ee.Geometry.BBox(*row.geometry.bounds)
    # sample = sentinel_image.stratifiedSample(
    #     numPoints=36, classBand='B3', region=bbox, scale=10, seed=42, geometries=True)
    sample = sentinel_image.sample(
        region=bbox, scale=10,  factor, numPixels=36, seed=42, geometries=True)
    gdf = geemap.ee_to_gdf(sample)
    gdfs.append(gdf)

  0%|          | 0/100 [00:00<?, ?it/s]

Exception: Image.stratifiedSample: The class band must be integer typed.