In [37]:
import ee
import geemap
import hvplot.pandas
import hvplot.xarray
import xarray as xr
import geopandas as gpd
import pandas as pd
import numpy as np
from pathlib import Path
import shapely

In [2]:
ee.Authenticate()

Enter verification code:  4/1AeaYSHD0CZ47tMPCNfG4Wz-IOLzfYXRma8hqsRcRTcL9yTqbux5XzgHBqlw



Successfully saved authorization token.


In [3]:
Map = geemap.Map(center=(40, -100), zoom=4)
Map

Map(center=[40, -100], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(ch…

In [4]:
import json
import urllib.request

def get_mrc_metadata(return_gdf=True, verbose=False):
    """ Get Metadata from Mekong River Comission Data Portal. """
    url = r'https://api.mrcmekong.org/api/v1/ts/inventory/timeSeriesList'
    urllib.request.urlretrieve(url, 'timeSeriesList.json')
    
    if verbose:
        print(f'Downloaded time-series metadata from {url} .')
    
    f = open('timeSeriesList.json')
    data = json.load(f)
    df_metadata = pd.DataFrame([])
    for dataset in data:
        df_temp = pd.DataFrame([dict(dataset)])
        df_temp['longitude'] = df_temp['longitude'].astype(float)
        df_temp['latitude'] = df_temp['latitude'].astype(float)
        df_metadata = pd.concat([df_temp, df_metadata])
        f.close()
    df_metadata = df_metadata.reset_index().drop(columns=['index'])
    
    if verbose:
        print(f'Found a total of {df_metadata.shape[0]} time-series datasets from {len(df_metadata.stationCode.unique())} stations of the MRC Data Portal.')
        
    if return_gdf:
        gdf_metadata = gpd.GeoDataFrame(
            df_metadata, geometry=gpd.points_from_xy(df_metadata.longitude, df_metadata.latitude), crs="EPSG:4326"
        )
        return(gdf_metadata)
    else:
        return(df_metadata)

gdf_metadata = get_mrc_metadata(return_gdf=True)
gdf_metadata_dmsp = gdf_metadata.loc[gdf_metadata.label.str.contains('DSMP')]
gdf_stations_dsmp = gdf_metadata_dmsp.groupby('locationIdentifier').first()[['river', 'stationShortName', 'geometry']].set_crs('EPSG:4326')
Map.add_gdf(gdf_stations_dsmp, 'MRC DSMP stations', {'color': 'red'})

In [143]:
# Load data from DSMP surves
paths_data_s = list(Path(f'../mrc_webscrapper/outputs/csv/Sediment Concentration/').glob(f'*.csv'))
paths_data_q = list(Path(f'../mrc_webscrapper/outputs/csv/Discharge/').glob(f'*.csv'))
paths_data = paths_data_q + paths_data_s
df_data = pd.DataFrame([])
for path in paths_data_s:
    df_temp = pd.read_csv(path, dtype={'station_code':'str'})
    df_temp['date_utc'] = pd.to_datetime(df_temp['date'])
    df_temp['med_frq'] = np.median(np.diff(df_temp.date_utc))
    df_data = pd.concat([df_data, df_temp])

df_data_dsmp = df_data.loc[df_data.identifier.str.contains('DSMP')]
df_dsmp_stations = df_data_dsmp.groupby('station_code').first()
df_data_dsmp = df_data.loc[df_data.station_code.isin(df_dsmp_stations.index)]
    
gdf_stations = gpd.GeoDataFrame(df_dsmp_stations,
                 crs={'init': 'epsg:4326'},
                 geometry=df_dsmp_stations.apply(lambda row: shapely.geometry.Point((row.lon, row.lat)), axis=1)
                )

gdf_data_dsmp = gpd.GeoDataFrame(df_data_dsmp.join(gdf_stations.geometry, on='station_code'))

In [357]:
kwargs = {
}
# Set timespan
start_date, end_date = '2018-01-01', '2024-01-30'

# Cloud masking (scene-based)
cld_filt_thresh = 80        # Maximum image cloud cover percent allowed in image collection

# water masking
mask_water = True

# # Cloud masking (pixel-based, s2cloudless only)
# cld_prb_thresh = 25      # Cloud probability (%); values greater than are considered cloud
# cld_prj_dist = 1          # Maximum distance (km) to search for cloud shadows from cloud edges (based on Hollstein decision tree)

# Cloud masking (pixel-based, cloud score+ only)
qa_band = 'cs_cdf'
clear_thresh = 0.75

In [18]:
# load 3S basin
json_data = 'geometries/geoms.geojson'
fc_geoms = geemap.geojson_to_ee(json_data)
roi_geom = fc_geoms.first().geometry()

# load dams
df = pd.read_csv('geometries/3SReservoirs.csv')
gdf_dams = gpd.GeoDataFrame(df, geometry=gpd.GeoSeries.from_xy(df['X'], df['Y']), crs=4326).drop(columns=['X', 'Y']).set_index('id')
fc_dams = geemap.geopandas_to_ee(gdf_dams)

# define stations
stations = ee.FeatureCollection([
  ee.Feature(ee.Algorithms.GeometryConstructors.Point([106.39220569493087, 14.11961739545873]), {'station_id': '430102', 'station_name': 'Siempang'}),
  ee.Feature(ee.Algorithms.GeometryConstructors.Point([107.78237137912318, 12.897991717109619]), {'station_id': '451305', 'station_name': 'Ban Don'}),
  ee.Feature(ee.Algorithms.GeometryConstructors.Point([106.5278998223061, 13.553039642985812]), {'station_id': '450101', 'station_name': 'Lum Phat'}),
  ee.Feature(ee.Algorithms.GeometryConstructors.Point([107.47041987706022, 13.940093424442852]), {'station_id': '440202', 'station_name': 'Pleiku'}),
  ee.Feature(ee.Algorithms.GeometryConstructors.Point([107.10639453530615, 14.050489408398247]), {'station_id': '440103', 'station_name': 'Andaung Meas'}),
  ee.Feature(ee.Algorithms.GeometryConstructors.Point([107.44760623609672, 13.792268298200513]), {'station_id': '440100', 'station_name': 'Phum Pi'}),
  ee.Feature(ee.Algorithms.GeometryConstructors.Point([105.987503299606, 13.537303972542501]), {'station_id': '014501', 'station_name': 'Stung Treng'}),
])

lake_rhone = ee.Feature(ee.FeatureCollection('users/michaelbrechbuehler/Landsat_ST/shapefiles/rhone_wgs84').first().geometry().centroid(), {'station_id': 'Rhonesee'})
lake_steisee = ee.Feature(ee.FeatureCollection('users/michaelbrechbuehler/Landsat_ST/shapefiles/steisee_wgs84').first().geometry().centroid(), {'station_id': 'Steinsee'})

stations = lake_rhone

# # add S2 clear median
# filters = ee.Filter.And(
#     ee.Filter.bounds(roi_geom), 
#     ee.Filter.date('2017-12-01', '2019-03-31'),
#     ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10)
# )
# s2_median = ee.ImageCollection("COPERNICUS/S2_SR").filter(filters).median()

# Map.addLayer(s2_median, {'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 2000}, 'Sentinel-2 RGB')
#Map.addLayer(roi_geom, {'color': 'yellow'}, '3S Basin')
Map.addLayer(stations, {'color': 'red'}, 'Stations')
Map.add_gdf(gdf_dams, 'Dams', {'color': 'blue'})
Map.centerObject(stations)

## Get point samples at virtual stations

In [None]:
%load_ext autoreload
%autoreload 2
import functions_process as funcs_process
import functions_turbidity as funcs_turb
import functions_sampling as funcs_sampling

for idx, row in geemap.ee_to_df(stations).iterrows():
    name, id = row.station_name, row.station_id
    print(f'{name} ({id})')
    station = stations.filter(ee.Filter.eq('station_id', id))

    # get Rrs imagecolls
    ic_msi = funcs_process.load_rrs_imcoll(sensor='msi', start_date=start_date, end_date=end_date, mask_water=mask_water, bounds=station)
    ic_oli = funcs_process.load_rrs_imcoll(sensor='oli', start_date=start_date, end_date=end_date, mask_water=mask_water, bounds=station)
    ic_etm = funcs_process.load_rrs_imcoll(sensor='etm', start_date=start_date, end_date=end_date, mask_water=mask_water, bounds=station)
    ic_all = ic_msi.merge(ic_oli).merge(ic_etm)
    
    # comput tsm features
    ic_all = ic_all \
        .map(funcs_turb.calc_spm_nechad) \
        .map(funcs_turb.calc_tur_nechad) \
        .map(funcs_turb.calc_tur_dogliotti) \
        .map(funcs_turb.calc_indices)

    # merge imagecolls and sample
    fc_all = ee.FeatureCollection(ic_all.map(funcs_sampling.sample_image(station))).flatten()

    # export to drive
    geemap.ee_export_vector_to_drive(
        fc_all.filter(ee.Filter.gt('roi_coverage', 90)),
        fileFormat='CSV', 
        folder="export",
        description=f"rrs_samples_{name.replace(' ', '').lower()}_{id}", 
    )

In [None]:
# fetch server-side data and convert to dataframe
data = fc_all.filter(ee.Filter.gt('roi_coverage', 90)).getInfo()
df_all = pd.DataFrame.from_dict(data['features'])
df_all = df_all.join(df_all["properties"].apply(pd.Series)).drop(columns=['geometry', 'properties'])
df_all['timestamp'] = pd.to_datetime(df_all['system:time_start'], unit='ms')

## Get map over virtual station

In [775]:
def process_station(ic_rs, max_diff=1):
    def wrap(fc_station):
        fc_matchups = get_matchups(fc_station, ic_rs, max_diff).map(get_sample)
        return fc_matchups
    return wrap

def get_matchups(fc_station, ic_rs, max_diff=1):
    """ Matches FeatureCollection with closest match from ImageCollection. """
    geometry = ee.FeatureCollection(fc_station).geometry()
    ic_rs = ic_rs.filter(ee.Filter.bounds(geometry))
    max_diff_filter = ee.Filter.maxDifference(**{
      'difference': max_diff * 24 * 60 * 60 * 1000,
      'leftField': 'system:time_start',
      'rightField': 'system:time_start'
    });
    save_best_join = ee.Join.saveBest(**{
      'matchKey': 'bestImage',
      'measureKey': 'timeDiff'
    });
    fc_matchups = save_best_join.apply(fc_station, ic_rs, max_diff_filter);
    return fc_matchups

def get_sample(feature):
    """ Sample matched image at feature geometry and add aggregated value as property. """
    feature = ee.Feature(feature)
    match_img = ee.Image(feature.get('bestImage'))
    geometry = feature.geometry().buffer(200)
    value = feature.get('value')
    samples_agg = match_img.reduceRegion(reducer=ee.Reducer.median(), geometry=geometry)
    feature = feature.set('value', samples_agg) 
    return(feature)

In [776]:
%load_ext autoreload
%autoreload 2
import functions_process as funcs_process
import functions_turbidity as funcs_turb
import functions_sampling as funcs_sampling

# prepare insitu data
gdf_data_dsmp_subset = gdf_data_dsmp.loc[(gdf_data_dsmp.date_utc > pd.to_datetime(start_date+' 00:00:00UTC')) & (gdf_data_dsmp.date_utc < pd.to_datetime(end_date+' 00:00:00UTC'))]
gdf_data_dsmp_subset['date_utc_str'] = gdf_data_dsmp_subset.date_utc.apply(lambda x: x.strftime('%Y-%m-%d %X'))
gdf_data = gdf_data_dsmp_subset
list_data_stations = []
for station in gdf_data.station_code.unique():
    gdf_data_station = gdf_data.loc[gdf_data.station_code==station]
    fc_data_station = ee.FeatureCollection(geemap.gdf_to_ee(
        gdf_data_station[['station_code', 'value', 'date_utc_str', 'geometry']],
        date='date_utc_str', date_format='YYYY-MM-dd HH:mm:ss'
        ) \
        .set('station', station)
    )
    list_data_stations.append(fc_data_station)
list_data_stations = ee.List(list_data_stations)

bounds = fc_data_stations.flatten().geometry()
ic_oli = funcs_process.load_sr_imcoll(sensor='oli', start_date=start_date, end_date=end_date, bounds=bounds, watermask='qa')
ic_etm = funcs_process.load_sr_imcoll(sensor='etm', start_date=start_date, end_date=end_date, bounds=bounds, watermask='qa')
ic_msi = funcs_process.load_sr_imcoll(sensor='msi', start_date=start_date, end_date=end_date, bounds=bounds, watermask='scl')

# create matchups
fc_matchups_oli = ee.FeatureCollection(list_data_stations.map(process_station(ic_oli, max_diff=1))).flatten()
fc_matchups_msi = ee.FeatureCollection(list_data_stations.map(process_station(ic_msi, max_diff=1))).flatten()
fc_matchups_etm = ee.FeatureCollection(list_data_stations.map(process_station(ic_etm, max_diff=1))).flatten()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [777]:
import time
from datetime import datetime, timedelta

def check_tasks_status(tasks):
    """ Check the state of all provided ee.task objects and posts status updates. """
    colordict = {'white': '\033[0m', 'red': '\033[91m', 'orange': '\033[93m', 'green': '\033[92m'}
    states = []
    for task in tasks:
        # get state and times
        status = task.status()
        state = status['state']
        task_id = status['id']
        time_start, time_update = status['creation_timestamp_ms'], status['update_timestamp_ms']
        time_elapsed = timedelta(milliseconds=(time_update-time_start))
        time_now = datetime.now()
        # set output color
        if state == 'COMPLETED':
            color ='green'
        elif (state == 'RUNNING') | (state == 'READY'):
            color = 'orange'
        elif (state == 'FAILED') | (state == 'CANCEL_REQUESTED') | (state == 'CANCELLED'):
            color = 'red'
        else:
            color = 'white'
        # print msg
        status_msg = f"[{str(time_now)[:19]}] Task {task_id}" \
                     f"({status.get('description', 'No description')}): {colordict[color]+state+colordict['white']}"
                     #f" (runtime: {time_elapsed.seconds/60:0.1f}min)"
        print(status_msg)
        states.append(state)
    return states

tasks = []
for sensor, fc in [('oli', fc_matchups_oli), ('etm', fc_matchups_etm), ('msi', fc_matchups_msi)]:
    task = ee.batch.Export.table.toDrive(**{
        'collection': fc, 
        'description': f'TSS_export_{sensor}',
        'folder': 'Earth Engine'})
    task.start()
    tasks.append(task)

all_completed = False
while not all_completed:
    check_tasks_status(tasks)
    states = [task.status()['state'] for task in tasks]
    if all(state in ['COMPLETED', 'FAILED', 'CANCEL_REQUESTED'] for state in states):
        all_completed = True
    else:
        time.sleep(30)

print("All export tasks finished.")

[2024-03-22 16:42:32] Task HRRAP2YKOLHJPMIUPA3PIS2T(TSS_export_oli): [93mREADY[0m
[2024-03-22 16:42:32] Task JE4XCH6OWERCEWM7YTMGA5B6(TSS_export_etm): [93mREADY[0m
[2024-03-22 16:42:33] Task PX26T36EGD5MB2RKL4AJLH5U(TSS_export_msi): [93mREADY[0m
[2024-03-22 16:43:04] Task HRRAP2YKOLHJPMIUPA3PIS2T(TSS_export_oli): [93mRUNNING[0m
[2024-03-22 16:43:04] Task JE4XCH6OWERCEWM7YTMGA5B6(TSS_export_etm): [92mCOMPLETED[0m
[2024-03-22 16:43:05] Task PX26T36EGD5MB2RKL4AJLH5U(TSS_export_msi): [93mRUNNING[0m
[2024-03-22 16:43:36] Task HRRAP2YKOLHJPMIUPA3PIS2T(TSS_export_oli): [93mRUNNING[0m
[2024-03-22 16:43:36] Task JE4XCH6OWERCEWM7YTMGA5B6(TSS_export_etm): [92mCOMPLETED[0m
[2024-03-22 16:43:37] Task PX26T36EGD5MB2RKL4AJLH5U(TSS_export_msi): [93mRUNNING[0m
[2024-03-22 16:44:08] Task HRRAP2YKOLHJPMIUPA3PIS2T(TSS_export_oli): [92mCOMPLETED[0m
[2024-03-22 16:44:08] Task JE4XCH6OWERCEWM7YTMGA5B6(TSS_export_etm): [92mCOMPLETED[0m
[2024-03-22 16:44:09] Task PX26T36EGD5MB2RKL4AJLH5U(

In [None]:
%load_ext autoreload
%autoreload 2
import functions_process as funcs_process
import functions_turbidity as funcs_turb
import functions_sampling as funcs_sampling

# get Rrs imagecolls
ic_msi = funcs_process.load_rrs_imcoll(sensor='msi', start_date=start_date, end_date=end_date, bounds=bounds)
ic_oli = funcs_process.load_rrs_imcoll(sensor='oli', start_date=start_date, end_date=end_date, bounds=bounds)
ic_etm = funcs_process.load_rrs_imcoll(sensor='etm', start_date=start_date, end_date=end_date, bounds=bounds)
ic_all = ic_msi.merge(ic_oli).merge(ic_etm)

# comput tsm features
ic_all = ic_all \
    .map(funcs_turb.calc_spm_nechad) \
    .map(funcs_turb.calc_tur_nechad) \
    .map(funcs_turb.calc_tur_dogliotti) \
    .map(funcs_turb.calc_indices)

In [None]:
crs = ic_msi.first().select(0).projection().crs()
scale = 30

# wxee convert to xarray
#ds_msi = ic_msi.select('B4').limit(25).wx.to_xarray(scale=scale, region=bounds)

# geemap export
geemap.ee_export_image_collection_to_drive(ee.ImageCollection(ic_msi).select('B4'), folder='export/oli', maxPixels=200000000, region=bounds, scale=30)

In [None]:
# Export red bands to geotiffs
geemap.ee_export_image_collection_to_drive(ee.ImageCollection(imcoll_etm).select('B3'), folder='export/msi', maxPixels=200000000, region=bounds, scale=30)
geemap.ee_export_image_collection_to_drive(ee.ImageCollection(imcoll_oli).select('B4'), folder='export/oli', maxPixels=200000000, region=bounds, scale=30)
geemap.ee_export_image_collection_to_drive(ee.ImageCollection(imcoll_msi).select('B4'), folder='export/oli', maxPixels=200000000, region=bounds, scale=30)

In [None]:
ee.Algorithms.GeometryConstructors.Point(gdf_dams.iloc[0].geometry.coords[0]).buffer(5000).bounds()

In [None]:
ds_msi.is_cloud.isel(time=0).plot()

In [None]:
Map.centerObject(img_oli, 14)

In [None]:
img_etm = ic_etm.sort('CLOUD_COVER', True).first()
img_oli = ic_oli.sort('CLOUD_COVER', True).first()
img_msi = ic_msi.sort('CLOUDY_PIXEL_PERCENTAGE', True).first()

#vis = {'bands': 'Rrs_B4', 'min': 0, 'max': 0.2}
Map.addLayer(ee.Image(img_etm),  {}, 'ETM+, Rrs_B3')
Map.addLayer(ee.Image(img_oli),  {}, 'OLI, Rrs_B4')
Map.addLayer(ee.Image(img_msi),  {}, 'MSI, Rrs_B4')
Map.centerObject(img_msi, 14)