In [1]:
%matplotlib widget
# %matplotlib inline
# %matplotlib qt 

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import folium
import geopandas as gpd
from shapely.geometry import shape, GeometryCollection
from pyproj import CRS
from tqdm.notebook import tqdm
from tqdm import trange
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets

import ee
ee.Initialize()

import geemap.eefolium as emap
create_map = lambda: emap.Map()

## Query water occurrence values for all ICESat-2 points

In [2]:
# DK
# df = pd.read_parquet('../data/out/icesat-2-DK.parquet')
# image = ee.Image('users/gena/eo-bathymetry/NDWI/NDWI-4521-v9') 

# NL
df = pd.read_parquet('../data/out/icesat-2-NL.parquet')
extent = [5.8062744140625, 53.36202615413913, 6.1791229248046875, 53.5092846887053]
rect = ee.Geometry.Rectangle(extent)
image = ee.ImageCollection([ 
    ee.Image('users/gena/eo-bathymetry/NDWI/NDWI-4520-v9').resample('bicubic'),
    ee.Image('users/gena/eo-bathymetry/NDWI/NDWI-4448-v9').resample('bicubic')
]).mosaic().clip(rect)

# FR
# df = pd.read_parquet('../data/out/icesat-2-FR.parquet')
# image = ee.Image('users/gena/eo-bathymetry/NDWI/NDWI-4300-v9')

In [3]:
print(image.reduceRegion(reducer=ee.Reducer.minMax(), geometry=image.geometry(), scale=90).getInfo())

{'water_score_max': 0.1594305609635043, 'water_score_min': -0.10875057600360025}


In [4]:
df.columns

Index(['track_id', 'date', 'beam', 'series', 'lon', 'lat', 'h'], dtype='object')

In [5]:
df.series.unique()

array(['Noise', 'Buffer', 'Low', 'Medium', 'High'], dtype=object)

In [6]:
BATCH_SIZE = 50000 # fails after ~90k

In [7]:
len(df) // BATCH_SIZE

48

In [8]:
# df2 = df_high.iloc[:BATCH_SIZE,:]

In [9]:
df2 = df.sample(n=BATCH_SIZE)

In [10]:
len(df)

2448528

In [11]:
# # extent = [8.46, 55.16, 8.63, 55.30]
# # extent = [8.46, 55.36, 8.62, 55.42]

# df2 = df[(df.lon > extent[0]) & (df.lon < extent[2]) & (df.lat > extent[1]) & (df.lat < extent[3])]
# df2 = df2[(df2.series != 'Noise')]
# # df2 = df2.copy()

# len(df2)

In [12]:
# df['lat_'] = df.lon
# df['lon'] = df.lat
# df['lat'] = df.lat_
# df.to_parquet('../data/out/icesat-2-NL.parquet')

In [13]:
df

Unnamed: 0,track_id,date,beam,series,lon,lat,h
0,252,2019-01-13,gt3r,Noise,5.824986,53.490235,18.572187
1,252,2019-01-13,gt3r,Noise,5.824943,53.489975,17.784880
2,252,2019-01-13,gt3r,Noise,5.824911,53.489753,91.943270
3,252,2019-01-13,gt3r,Noise,5.824727,53.488632,66.256035
4,252,2019-01-13,gt3r,Noise,5.824203,53.485430,78.600680
...,...,...,...,...,...,...,...
2448523,1357,2020-03-24,gt1l,High,5.892009,53.508791,39.567448
2448524,1357,2020-03-24,gt1l,High,5.892008,53.508798,39.205154
2448525,1357,2020-03-24,gt1l,High,5.892007,53.508804,39.256588
2448526,1357,2020-03-24,gt1l,High,5.892003,53.508830,39.418655


In [14]:
df2 = df[(df.series != 'Noise')].sample(n=BATCH_SIZE)

In [15]:
coords = list(zip(df2.lon, df2.lat))

In [16]:
len(df2)

50000

In [17]:
len(coords)

50000

In [18]:
g = ee.Geometry.MultiPoint(coords) # visualize using multi-point instead of feature collection - faster
pointsRGB = ee.FeatureCollection(g).style(pointSize=2, color='00000000', fillColor='ffeda0')
pointsRGB = pointsRGB.select(0).mask().focal_max(3).selfMask().visualize(palette=['000000'], opacity=0.5).blend(pointsRGB)

extent = ee.Geometry.Rectangle(coords=[df.lon.min(), df.lat.min(), df.lon.max(), df.lat.max()]) 
extent = ee.Geometry.LineString(extent.coordinates().get(0))

imageRGB = image.updateMask(image.gt(-0.09)) 

m = create_map()
m.addLayer(imageRGB, { 'min': -0.10, 'max': 0.15, 'palette': ['lightblue', 'black'] }, 'water occurrence')
m.addLayer(extent, { 'color': 'white' }, 'rect')
m.addLayer(pointsRGB, {}, 'sampled points')
m.setCenter(df.lon.mean(), df.lat.mean(), 11)
m.addLayerControl()
m

In [19]:
# features = ee.List(coords).map(lambda o: ee.Feature(ee.Geometry.Point(coords=o)))
# features = ee.FeatureCollection(features)
# features.size().getInfo()

In [20]:
# extent = ee.Geometry.Rectangle(coords=[df.lon.min(), df.lat.min(), df.lon.max(), df.lat.max()]).buffer(1000, 100)
# extent_image = ee.Image(0).paint(ee.FeatureCollection(extent), 1)

# image = image.mask(extent_image)

In [21]:
# values = image.unmask(-999, False).reduceRegions(collection=features, reducer=ee.Reducer.first().setOutputs(['water_occurrence']), scale=10, tileScale=1)
# results = values.aggregate_array('water_occurrence').getInfo()

In [22]:
# values.toList(1, 0).get(0).getInfo()

In [23]:
# len(results)

In [24]:
# df2['water_occurrence'] = results

In [25]:
# len(df)

In [26]:
results = []

for i in tqdm(range(0, len(df), BATCH_SIZE)):
    coords = list(zip(df.lon.values[i:(i+BATCH_SIZE)], df.lat.values[i:(i+BATCH_SIZE)]))
    
    features = ee.List(coords).map(lambda o: ee.Feature(ee.Geometry.Point(coords=o)))
    features = ee.FeatureCollection(features)
    
    values = image.unmask(-999, False).reduceRegions(collection=features, reducer=ee.Reducer.first().setOutputs(['water_occurrence']), scale=10, tileScale=1)
    results = results + values.aggregate_array('water_occurrence').getInfo()

HBox(children=(FloatProgress(value=0.0, max=49.0), HTML(value='')))




In [27]:
len(results)

2448528

In [28]:
df['water_occurrence'] = results

In [29]:
df

Unnamed: 0,track_id,date,beam,series,lon,lat,h,water_occurrence
0,252,2019-01-13,gt3r,Noise,5.824986,53.490235,18.572187,0.147138
1,252,2019-01-13,gt3r,Noise,5.824943,53.489975,17.784880,0.147032
2,252,2019-01-13,gt3r,Noise,5.824911,53.489753,91.943270,0.146992
3,252,2019-01-13,gt3r,Noise,5.824727,53.488632,66.256035,0.146083
4,252,2019-01-13,gt3r,Noise,5.824203,53.485430,78.600680,0.147541
...,...,...,...,...,...,...,...,...
2448523,1357,2020-03-24,gt1l,High,5.892009,53.508791,39.567448,0.144328
2448524,1357,2020-03-24,gt1l,High,5.892008,53.508798,39.205154,0.144328
2448525,1357,2020-03-24,gt1l,High,5.892007,53.508804,39.256588,0.144328
2448526,1357,2020-03-24,gt1l,High,5.892003,53.508830,39.418655,0.144328


In [30]:
df[df.water_occurrence == -999] = np.nan

In [31]:
fig, ax = plt.subplots()
df.water_occurrence.hist(ax=ax, bins=100)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.axes._subplots.AxesSubplot at 0x211a298b348>

In [245]:
# plt.close('all')
fig, ax = plt.subplots(figsize=(10, 3))
hmin = 45
hmax = 49
# df3 = df[(df.h > hmin) & (df.h < hmax)]
df3 = df

df4 = df3[df3.series == 'High']
ax.plot(df4.water_occurrence, df4.h, 'k.', markersize=0.5, alpha=0.2)

df4 = df3[df3.series == 'Medium']
ax.plot(df4.water_occurrence, df4.h, 'r.', markersize=1)

df4 = df3[df3.series == 'Buffer']
ax.plot(df4.water_occurrence, df4.h, 'b.', markersize=1)

ax.set_xlabel('water occurrence')
ax.set_ylabel('elevation (ICESat-2)')
plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [32]:
df2 = df

In [33]:
df2

Unnamed: 0,track_id,date,beam,series,lon,lat,h,water_occurrence
0,252,2019-01-13,gt3r,Noise,5.824986,53.490235,18.572187,0.147138
1,252,2019-01-13,gt3r,Noise,5.824943,53.489975,17.784880,0.147032
2,252,2019-01-13,gt3r,Noise,5.824911,53.489753,91.943270,0.146992
3,252,2019-01-13,gt3r,Noise,5.824727,53.488632,66.256035,0.146083
4,252,2019-01-13,gt3r,Noise,5.824203,53.485430,78.600680,0.147541
...,...,...,...,...,...,...,...,...
2448523,1357,2020-03-24,gt1l,High,5.892009,53.508791,39.567448,0.144328
2448524,1357,2020-03-24,gt1l,High,5.892008,53.508798,39.205154,0.144328
2448525,1357,2020-03-24,gt1l,High,5.892007,53.508804,39.256588,0.144328
2448526,1357,2020-03-24,gt1l,High,5.892003,53.508830,39.418655,0.144328


In [34]:
df2.beam.unique()

array(['gt3r', 'gt3l', nan, 'gt2r', 'gt2l', 'gt1r', 'gt1l'], dtype=object)

In [35]:
tracks = df.track_id.unique()
tracks

array(['252', nan, '412', '694', '1357'], dtype=object)

In [36]:
for t in tracks:
    print(t, df[df.track_id == t].date.unique())

252 ['2019-01-13' '2019-04-14']
nan []
412 ['2018-10-25' '2019-01-24' '2019-04-25' '2019-10-23' '2020-01-22']
694 ['2019-02-11' '2019-05-13' '2019-08-12' '2019-11-11' '2020-02-10']
1357 ['2018-12-26' '2019-03-27' '2019-06-26' '2019-09-24' '2019-12-24'
 '2020-03-24']


In [37]:
df.date.unique()

array(['2019-01-13', nan, '2019-04-14', '2018-10-25', '2019-01-24',
       '2019-04-25', '2019-10-23', '2020-01-22', '2019-02-11',
       '2019-05-13', '2019-08-12', '2019-11-11', '2020-02-10',
       '2018-12-26', '2019-03-27', '2019-06-26', '2019-09-24',
       '2019-12-24', '2020-03-24'], dtype=object)

In [38]:
tqdm.pandas()

  from pandas import Panel


In [39]:
# hmin = 45
# hmax = 49
# df3 = df[(df.h > hmin) & (df.h < hmax)].copy()
df3 = df

df3['filter'] = df3.progress_apply(lambda row: f"{row['track_id']}, {row['beam']}, {row['series']}", axis=1).values


HBox(children=(FloatProgress(value=0.0, max=2448528.0), HTML(value='')))




In [258]:
df3

Unnamed: 0,track_id,date,beam,series,lon,lat,h,water_occurrence,filter
0,252,2019-01-13,gt3r,Noise,5.824986,53.490235,18.572187,0.147138,"252, gt3r, Noise"
1,252,2019-01-13,gt3r,Noise,5.824943,53.489975,17.784880,0.147032,"252, gt3r, Noise"
2,252,2019-01-13,gt3r,Noise,5.824911,53.489753,91.943270,0.146992,"252, gt3r, Noise"
3,252,2019-01-13,gt3r,Noise,5.824727,53.488632,66.256035,0.146083,"252, gt3r, Noise"
4,252,2019-01-13,gt3r,Noise,5.824203,53.485430,78.600680,0.147541,"252, gt3r, Noise"
...,...,...,...,...,...,...,...,...,...
2448523,1357,2020-03-24,gt1l,High,5.892009,53.508791,39.567448,0.144328,"1357, gt1l, High"
2448524,1357,2020-03-24,gt1l,High,5.892008,53.508798,39.205154,0.144328,"1357, gt1l, High"
2448525,1357,2020-03-24,gt1l,High,5.892007,53.508804,39.256588,0.144328,"1357, gt1l, High"
2448526,1357,2020-03-24,gt1l,High,5.892003,53.508830,39.418655,0.144328,"1357, gt1l, High"


In [43]:
filters = df3.sort_values(['track_id', 'beam', 'series'])['filter'].drop_duplicates().values


In [57]:
filters = [f for f in filters if 'High' in f]

In [58]:
filters

['1357, gt1l, High',
 '1357, gt1r, High',
 '1357, gt2l, High',
 '1357, gt2r, High',
 '1357, gt3l, High',
 '1357, gt3r, High',
 '252, gt1l, High',
 '252, gt1r, High',
 '252, gt2l, High',
 '252, gt2r, High',
 '252, gt3l, High',
 '252, gt3r, High',
 '412, gt1l, High',
 '412, gt1r, High',
 '412, gt2l, High',
 '412, gt2r, High',
 '412, gt3l, High',
 '412, gt3r, High',
 '694, gt1l, High',
 '694, gt1r, High',
 '694, gt2l, High',
 '694, gt2r, High',
 '694, gt3l, High',
 '694, gt3r, High']

In [40]:
df.columns

Index(['track_id', 'date', 'beam', 'series', 'lon', 'lat', 'h',
       'water_occurrence', 'filter'],
      dtype='object')

In [62]:
hmin = 38
hmax = 44

df3 = df[(df.series != 'Noise') & (df.series != 'Medium')]
df3 = df3[(df3.h > hmin) & (df3.h < hmax)]



@interact(f=filters)
def show(f=filters[0]):
    plt.close('all')
    fig, ax = plt.subplots(figsize=(13, 5))

    # draw all
    ax.plot(df3.water_occurrence, df3.h, 'k.', markersize=0.5, alpha=0.1)

    # draw only specific track/beam/series
    df4 = df3[df3['filter'] == f]
    ax.plot(df4.water_occurrence, df4.h, 'r.', markersize=1, alpha=0.1)

    ax.set_xlabel('water occurrence')
    ax.set_ylabel('elevation (ICESat-2)')
    plt.tight_layout()

interactive(children=(Dropdown(description='f', options=('1357, gt1l, High', '1357, gt1r, High', '1357, gt2l, …