### Notebook for mapping wetland surface water dynamics using Sentinel-2 time-series data in combination with the Water Change Tracking algorithm (Chen et al., 2020)

In [None]:
import os
from glob import glob
import rasterio as rio
import numpy as np
from tqdm import tqdm,tqdm_notebook
from rasterio.plot import reshape_as_image
import re
import pandas as pd
import geopandas as gpd
from rasterio.features import sieve

from python.misc import compute_index, compute_ipm,strat_rand_sampling, calc_acc, plot_acc
from python.wct import compute_cluster, compute_rws,compute_mnws,render_wcf

import seaborn as sns
from rasterio.features import rasterize
import matplotlib.pyplot as plt

#setup I/O directories
parent_dir = os.path.join(os.path.abspath('..'),'image_data')
results_dir = os.path.join(parent_dir,'results')


#### A. Computing invalid pixel masks (cloud, cloud shadow, land vegetation)

In [None]:
%%time

#mosaic and dates
mosaics = glob(f'{parent_dir}/images/*.tif')
dates = [re.findall(r"(\d{8})", file)[0] for file in mosaics]

#get reference mosaics based 
ref_dates = [20170119,20170119,20170327,20170327,20170615,20170615,20170720,20170824,20171008,20171008,20171117,20171117]
ref_mosaics = [mos for date in ref_dates for mos in mosaics if str(date) in mos ]
months = pd.to_datetime(dates).strftime('%Y%m').unique().tolist()
ref_dict = dict(zip(months,ref_mosaics))

#data mask file
data_mask_file = os.path.join(results_dir,'misc','data_mask.tif')

with rio.open(data_mask_file) as src_data_mask:
    profile = src_data_mask.profile.copy()
    data_mask = src_data_mask.read()
    
    #loop over mosaic files
    for i in tqdm(range(len(mosaics)),position=0, leave=True):
        file = mosaics[i]
        ref_file = [ref_dict[key] for key in list(ref_dict.keys()) if key in file][0]
        
        with rio.open(file) as src,rio.open(ref_file) as src_ref:
            
            #target image
            tar_img = np.where(src.read()<0,0,src.read()).astype(rio.int16)
            tar_img = np.where(data_mask==1,tar_img ,src.nodata).astype(rio.int16)
            
            #reference image
            ref_img = np.where(src_ref.read()<0,0,src_ref.read()).astype(rio.int16)
            ref_img = np.where(data_mask==1,ref_img ,src.nodata).astype(rio.int16)
            
            #additional mask based on RWS region
            mndwi_img = compute_index(tar_img[1],tar_img[9],'MNDWI')
            mgrn_img = (tar_img[[1,2,7]].astype(np.float32)/10000).min(0)
            rws_region = compute_rws(mndwi_img,mgrn_img,thr='otsu')
            cl_masks = compute_ipm(tar_img[[0,1,2,7,9]],ref_img[[0,1,2,7,10]]).astype(rio.int8)
            cl_masks = np.where((tar_img[0]!=src.nodata)&(rws_region!=1),cl_masks,0).astype(rio.int8)
            
            #write to new geotiff
            profile.update({'dtype':cl_masks.dtype,'nodata':0,'count':1})
            outf = os.path.join(results_dir,'invalid_pixel_masks',f'invalid_{dates[i]}.tif')
            with rio.open(outf ,'w',**profile) as dst:
                dst.write_band(1, cl_masks)
                dst.set_band_description(1, f'invalid_{dates[i]}')

#### B. Computing Minimum Normalized Water Score (MNWS) images

In [None]:
%%time

#mnws computation
mosaics = glob(f'{parent_dir}/images/*.tif')
dates = [re.findall(r"(\d{8})", file)[0] for file in mosaics]

data_mask_file = os.path.join(results_dir,'misc','data_mask.tif')

with rio.open(data_mask_file) as src_data_mask:

    data_mask = src_data_mask.read()

    for i in tqdm(range(len(mosaics)),position=0, leave=True):
        file = mosaics[i]

        with rio.open(file) as src:
            profile = src.profile.copy()
            img = src.read()
            
            #set negative values to 0 and apply data mask (based on Sentinel-2 missing data strips)
            img = np.where(img<0,0,img)
            img = np.where(data_mask==1,img,src.nodata).astype(rio.int16)

            #compute water sample clusters
            mndwi_img = compute_index(img[1],img[9],'MNDWI')
            mgrn_img = (img[[1,2,7]].astype(np.float32)/10000).min(0)
            rws_region = compute_rws(mndwi_img,mgrn_img,thr='otsu')
            rws_img = np.where(rws_region==1,img,0)
            cluster_img = compute_cluster(rws_img[[0,1,2]],k=8)

            #compute MNWS image
            mnws_img = compute_mnws(img[[0, 1, 2, 7, 9, 10]],cluster_img)
            mnws_img = np.where(img[0]==src.nodata,src.nodata,mnws_img).astype(rio.float32)

            #export MNWS image
            profile.update(nodata=src.nodata,count=1,dtype=mnws_img.dtype)
            outf = os.path.join(results_dir,'mnws_images',f'rws_mnws_{dates[i]}.tif')
            with rio.open(outf,'w',**profile) as dst:
                dst.write_band(1,mnws_img)

        

#### C. Computing Sentinel-2 based Dynamic Water Map (S2-DWM)

In [None]:
%%time

mnws_files = glob(f'{parent_dir}/results/mnws_images/rws_mnw*.tif')
cl_mask_files = glob(f'{parent_dir}/results/invalid_pixel_masks/invalid*.tif')
upland_file = f'{os.path.abspath("..")}results/misc/upland_gte30.tif'

# #compute water coverage frequency map
wf_rws,water_sum,profile = render_wcf(mnws_files,cl_mask_files,upland_file,thr=3,dec=2)

#reclassify to water seasonality classes 
data_mask_file = f'{os.path.abspath("..")}results/misc/data_mask.tif'

with rio.open(data_mask_file) as src_data_mask:
    
    #used to assign non-water pixels (class value = 0)
    data_mask = src_data_mask.read(1)
    
    #reclassification of WF image
    dwm = wf_rws.copy()
    wetland = ((dwm >0)&(dwm <3))*2     #wetland
    sw = ((dwm >=3)&(dwm <=9))*3        #seasonal water
    pw = (dwm>9)*4                      #permanent water
    
    dwm_sum = np.array([wetland,sw,pw]).sum(0)
    dwm_out = np.where(dwm_sum>0,dwm_sum,data_mask).astype(rio.uint8)

    #sieve data
    dwm_out_sieved = sieve(dwm_out,size=30,connectivity=8)
    
    #export rasters
    out_wf = os.path.join(results_dir,'waterfreq.tif')
    profile.update({'dtype':wf_rws.dtype,'nodata':0,'count':1})
    with rio.open(out_wf,'w',**profile) as dst: dst.write_band(1,wf_rws)

    out_water_sum = os.path.join(results_dir,'watersum.tif')
    profile.update({'dtype':rio.uint8,'nodata':0,'count':1})
    with rio.open(out_water_sum,'w',**profile) as dst: dst.write_band(1,water_sum.astype(rio.uint8))

    out_dwm = os.path.join(results_dir,'dwm.tif')
    profile.update({'dtype':rio.uint8,'nodata':0,'count':1})
    with rio.open(out_dwm ,'w',**profile) as dst: dst.write_band(1,dwm_out_sieved)


#### D. Performing stratified random sampling based on S2-DWM

In [None]:
#create stratified random sample sites
file = os.path.join(results_dir,'dwm.tif')
gdf,block_img,block_profile = strat_rand_sampling(file=file,size=13,vstride_nr = 4,nodat=0,seed=42)

#add class description
gdf['desc'] = None
for c_code,c_desc in zip(gdf['class'].unique(),['Nw','Wl','Sw','Pw']):
    gdf.loc[gdf[gdf['class']==c_code].index,'desc'] = c_desc
    
#export sample points (raw)
gdf.to_file('./data//samples/stratified_random_samples_raw.geojson',driver="GeoJSON")

#export image block
with rio.open('./data//samples/sub_areas.tif','w',**block_profile) as dst:
    dst.write_band(1,block_img)

#### E. Accuracy assessment
 * Sample points created in previous step were used to evaluate the map accuracies of S2-DWM and JRC-GSW-S
 * Note that the sample points were relocated because several points were located ambiguously
 * Labelling was done using AcATaMa QGIS plugin and Sentinel Hub EO Browser
 * A confidence rate was included for each sample point

In [None]:
#S2-DWM and JRC-GSW-S
dwm_file =  os.path.join(results_dir,'dwm.tif')
jrc_file = os.path.join(results_dir,'misc','JRC_GSW_S_2017masked.tif')

#sample points (relocated and labelled)
sample_points = gpd.read_file(glob(f"./data/samples/*classified.geojson")[0])

#sample_points
with rio.open(dwm_file) as src_dwm,rio.open(jrc_file) as src_jrc:
    coords = list((zip(sample_points['geometry'].centroid.x,sample_points['geometry'].centroid.y)))
    sample_points['dwm_label'] = [val[0] for val in src_dwm.sample(coords)]
    sample_points['gsw_label'] = [val[0]+1 for val in src_jrc.sample(coords)]

    
# S2-DWM all classes
acc_df,con_mat = calc_acc(sample_points['true_label'],sample_points['dwm_label'])
# plot_acc(acc_df,con_mat,sample_points['true_name'].unique(),0.73,'(a.) S2-DWM four classes')
pd.concat([acc_df,con_mat],1).to_csv('./data/samples/acc_conmat/s2_dwm_4classes.csv')

#s2-DWM nw, sw (+wetland) and pw
acc_df,con_mat = calc_acc(sample_points['true_label'].replace(2,3),sample_points['dwm_label'].replace(2,3))
# plot_acc(acc_df,con_mat,sample_points['true_name'].unique()[[0,2,3]],0.85,'(b.) S2-DWM three classes')
pd.concat([acc_df,con_mat],1).to_csv('./data/samples/acc_conmat/s2_dwm_3classes.csv')

#JRC-GSW-S nw , sw and pw
acc_df,con_mat = calc_acc(sample_points['true_label'].replace(2,1),sample_points['gsw_label'].replace(2,1))
# plot_acc(acc_df,con_mat,sample_points['true_name'].unique()[[0,2,3]],0.85,'(c.) JRC-GSW-S three classes')
pd.concat([acc_df,con_mat],1).to_csv('./data/samples/acc_conmat/jrc_gsw_s_3classes.csv')

#### F. Computing latitudinal and longitudinal surface water areas

In [None]:
#compute lat lon surface water area

dwm_file =  os.path.join(results_dir,'dwm.tif')
jrc_file = os.path.join(results_dir,'misc','JRC_GSW_S_2017masked.tif')

with rio.open(dwm_file) as src_dwm, rio.open(jrc_file) as src_jrc:
    dwm_jrc = np.array([src_dwm.read(1) ,src_jrc.read(1)])
    height = dwm_jrc.shape[1]
    width = dwm_jrc.shape[2]
    
    steps = 10
    
    water_lat = {}
    lat_steps = range(0,height,steps)
    for lat in lat_steps:
        water_lat[f"lat_dwm_{lat}"] = dwm_jrc[0,lat,:]
        water_lat[f"lat_jrc_{lat}"] = dwm_jrc[1,lat,:]
    water_lat_df = pd.DataFrame(water_lat)
    water_lat_sum_dwm = pd.DataFrame(((water_lat_df.filter(regex=("dwm"))==4)*1).sum(0)/100,columns=['Lat. S2-DWM (Pw)']).reset_index(drop=True)
    water_lat_sum_jrc = pd.DataFrame(((water_lat_df.filter(regex=("jrc"))==3)*1).sum(0)/100,columns=['Lat. JRC-GSW-S (Pw)']).reset_index(drop=True)
    water_lat_sums_pw = pd.concat([water_lat_sum_dwm,water_lat_sum_jrc],1)
    water_lat_sums_pw.index = np.array(lat_steps)/100
    water_lat_sums_pw['km'] = water_lat_sums_pw.index
    water_lat_sums_pw.to_csv('./data/water_lat_lon/water_lat_sums_pw.csv')
    
    water_lon = {}
    lon_steps = range(0,width,steps)
    for lon in lon_steps:
        water_lon[f"lon_dwm_{lon}"] = dwm_jrc[0,:,lon]
        water_lon[f"lon_jrc_{lon}"] = dwm_jrc[1,:,lon]
    water_lon_df = pd.DataFrame(water_lon)
    water_lon_sum_dwm = pd.DataFrame(((water_lon_df.filter(regex=("dwm"))==4)*1).sum(0)/100,columns=['Lon. S2-DWM (Pw)']).reset_index(drop=True)
    water_lon_sum_jrc = pd.DataFrame(((water_lon_df.filter(regex=("jrc"))==3)*1).sum(0)/100,columns=['Lon. JRC-GSW-S (Pw)']).reset_index(drop=True)
    water_lon_sums_pw = pd.concat([water_lon_sum_dwm,water_lon_sum_jrc],1)
    water_lon_sums_pw.index = np.array(lon_steps)/100
    water_lon_sums_pw['km'] = water_lon_sums_pw.index
    water_lon_sums_pw.to_csv('./data/water_lat_lon/water_lon_sums_pw.csv')
    
    water_lat = {}
    lat_steps = range(0,height,steps)
    for lat in lat_steps:
        water_lat[f"lat_dwm_{lat}"] = dwm_jrc[0,lat,:]
        water_lat[f"lat_jrc_{lat}"] = dwm_jrc[1,lat,:]
    water_lat_df = pd.DataFrame(water_lat)
    water_lat_sum_dwm = pd.DataFrame(((water_lat_df.filter(regex=("dwm"))==3)*1).sum(0)/100,columns=['Lat. S2-DWM (Sw)']).reset_index(drop=True)
    water_lat_sum_dwm_wl = pd.DataFrame(((water_lat_df.filter(regex=("dwm")).isin([2,3])*1).sum(0)/100),columns=['Lat. S2-DWM (Sw + Wl)']).reset_index(drop=True)
    water_lat_sum_jrc = pd.DataFrame(((water_lat_df.filter(regex=("jrc"))==2)*1).sum(0)/100,columns=['Lat. JRC-GSW-S (Sw)']).reset_index(drop=True)
    water_lat_sums_sw = pd.concat([water_lat_sum_dwm,water_lat_sum_dwm_wl,water_lat_sum_jrc],1)
    water_lat_sums_sw.index = np.array(lat_steps)/100
    water_lat_sums_sw['km'] = water_lat_sums_sw.index
    water_lat_sums_sw.to_csv('./data/water_lat_lon/water_lat_sums_sw.csv')
    
    water_lon = {}
    lon_steps = range(0,width,steps)
    for lon in lon_steps:
        water_lon[f"lon_dwm_{lon}"] = dwm_jrc[0,:,lon]
        water_lon[f"lon_jrc_{lon}"] = dwm_jrc[1,:,lon]
    water_lon_df = pd.DataFrame(water_lon)
    water_lon_sum_dwm = pd.DataFrame(((water_lon_df.filter(regex=("dwm"))==3)*1).sum(0)/100,columns=['Lon. S2-DWM (Sw)']).reset_index(drop=True)
    water_lon_sum_dwm_wl = pd.DataFrame(((water_lon_df.filter(regex=("dwm")).isin([2,3])*1).sum(0)/100),columns=['Lon. S2-DWM (Sw + Wl)']).reset_index(drop=True)
    water_lon_sum_jrc = pd.DataFrame(((water_lon_df.filter(regex=("jrc"))==2)*1).sum(0)/100,columns=['Lon. JRC-GSW-S (Sw)']).reset_index(drop=True)
    water_lon_sums_sw = pd.concat([water_lon_sum_dwm,water_lon_sum_dwm_wl,water_lon_sum_jrc],1)
    water_lon_sums_sw.index = np.array(lon_steps)/100
    water_lon_sums_sw['km'] = water_lon_sums_sw.index
    water_lon_sums_sw.to_csv('./data/water_lat_lon/water_lon_sums_sw.csv')


In [None]:
#visualize lat lon water area

sns.set(font_scale=1.8)
sns.set_style('darkgrid',rc={"xtick.bottom" : True, "ytick.left" : True,'axes.edgecolor': 'black'})
fig, axs = plt.subplots(2,2,figsize=(20,10),sharey=True)
axs=axs.ravel()
sns.lineplot(data=water_lat_sums_pw,dashes=False,ax=axs[0],palette=['blue','orange'])
sns.lineplot(data=water_lon_sums_pw,dashes=False,ax=axs[1],palette=['blue','orange'])
sns.lineplot(data=water_lat_sums_sw,dashes=False,ax=axs[2],palette=['blue','green','orange'])
sns.lineplot(data=water_lon_sums_sw,dashes=False,ax=axs[3],palette=['blue','green','orange'])
axs[2].set_xlabel('Distance from north to south (km)')
axs[3].set_xlabel('Distance from west to east (km)')
fig.text(0, 0.5, 'Surface water area (ha)', ha='center', va='center', rotation='vertical')

for i in range(len(axs)):
    axs[i].margins(x=0)
    if i<2:
        axs[i].set_xticklabels([])
        
rmse_lat_pw = "%.2f" % np.sqrt(water_lat_sums_pw.iloc[:,1].sub(water_lat_sums_pw.iloc[:,0]).pow(2).mean())
axs[0].text(0.59,0.65, f"RMSE: {rmse_lat_pw}", ha="left", transform=axs[0].transAxes)

rmse_lon_pw ="%.2f" % np.sqrt(water_lon_sums_pw.iloc[:,1].sub(water_lon_sums_pw.iloc[:,0]).pow(2).mean())
axs[1].text(0.59,0.65, f"RMSE: {rmse_lon_pw}", ha="left", transform=axs[1].transAxes)

rmse_lat_sw = "%.2f" % np.sqrt(water_lat_sums_sw.iloc[:,2].sub(water_lat_sums_sw.iloc[:,0]).pow(2).mean())
rmse_lat_sw_wl = "%.2f" % np.sqrt(water_lat_sums_sw.iloc[:,2].sub(water_lat_sums_sw.iloc[:,1]).pow(2).mean())
axs[2].text(0.57,0.5, f"RMSE (Sw): {rmse_lat_sw}\nRMSE (Sw+Wl): {rmse_lat_sw_wl}", ha="left", transform=axs[2].transAxes)

rmse_lon_sw ="%.2f" % np.sqrt(water_lon_sums_sw.iloc[:,2].sub(water_lon_sums_sw.iloc[:,0]).pow(2).mean())
rmse_lon_sw_wl ="%.2f" % np.sqrt(water_lon_sums_sw.iloc[:,2].sub(water_lon_sums_sw.iloc[:,1]).pow(2).mean())
axs[3].text(0.57,0.5, f"RMSE (Sw): {rmse_lon_sw}\nRMSE (Sw+Wl): {rmse_lon_sw_wl}", ha="left", transform=axs[3].transAxes)

plt.tight_layout()


#### G. Computing MNWS boxplot and spectral signatures

In [None]:
%%time
#extract MNWS and spectral pixel values

long_names = ['Water','Settlement','Dark soil',' Bright soil','Vegetation','Agriculture','Cloud','Cloud shadow']
labels = dict(zip(list(range(1,len(long_names)+1)),long_names))

sample_files = glob('./data/samples/sample*.geojson')
dates = list(map(lambda x:re.findall(r"(\d{8})", x)[0],sample_files))
mosaics = glob(f'{parent_dir}/images/*.tif')
mosaics = [img for date in dates for img in mosaics if date in img]

mnws_files = glob(f'{parent_dir}/results/mnws_images/rws_mnw*.tif')

bands = ['B2','B3','B4','B5','B6','B7','B8','B8A','B11','B12']
nm = ['490','560','665','705','750','783','842','865','1910','2190']
band_names = [f'{a}\n{b} nm' for a,b in zip(bands,nm)]

all_band_data = []
all_mnws_data = []
for f in sample_files:
    date = re.findall(r"(\d{8})", f)[0]

    gdf = gpd.read_file(f)
    gdf = gdf.sort_values('code')
    geom_code = list((zip(gdf ['geometry'].tolist(), gdf['code'].tolist() )))
    
    fmnws = f'{results_dir}/mnws_images/rws_mnws_{date}.tif'
    fmos = f'{parent_dir}/images/mosaic_{date}.tif'
        
    with rio.open(fmos) as src, rio.open(fmnws) as src_mnws:
    
        img = src.read()
        img = np.where(img<0,0,img)
        img = img[[0,1,2,3,4,5,6,7,9,10]]
        img_mnws = src_mnws.read()
        sample_mask = rasterize(shapes=geom_code, out_shape=src.shape, transform=src.transform)
        
        #mnws stats
        mnws_data = pd.concat([pd.DataFrame(img_mnws[:,sample_mask==code]).T for code in gdf['code'].unique()],axis=1)
        mnws_data.columns = list(labels.values())
        all_mnws_data.append(mnws_data)
        
        band_data = []
        for code in gdf['code'].unique():
            band_df = pd.DataFrame(img[:,sample_mask==code]).T.describe().loc[['mean','std']]/10000
            band_df.columns = band_names
            band_df['lc'] = labels[code]
            band_data.append(band_df)
            
        band_data = pd.concat(band_data)
        all_band_data.append(band_data)
        
#export data
mnws_df = pd.concat(all_mnws_data).reset_index(drop=True)
mnws_df.to_csv('./data/mnws_boxplot.csv')

band_df = pd.concat(all_band_data)
band_df.to_csv('./data/spectral_signature.csv')

In [None]:
#plot box
mnws_df = pd.read_csv('./data/mnws_boxplot.csv')
plt.figure(figsize=(10,5))
sns.set_style('darkgrid',rc={"xtick.bottom" : True, "ytick.left" : True,'axes.edgecolor': 'black'})
flierprops = dict(marker='o',markerfacecolor='red', markersize=5,markeredgecolor='white')
colors=['blue','orange','brown','yellow','green','purple','red','black']
sns.boxplot(data=mnws_data,flierprops=flierprops,whis=[5, 95],orient='h',palette=colors,linewidth=1)
plt.xlabel('MNWS',fontsize=16)
plt.xticks(range(0, int(mnws_data.max().max()), 5))
plt.show()


In [None]:
#plot spectral signature
all_agg_mean = pd.read_csv('./data/spectral_signature.csv',index_col=0).loc['mean'].groupby('lc').mean().T[list(labels.values())]
all_agg_std = pd.concat(all_band_data).loc['std'].groupby('lc').mean().T[list(labels.values())]
lo_bound = all_agg_mean[['Water','Cloud shadow']]-all_agg_std[['Water','Cloud shadow']]
hi_bound = all_agg_mean[['Water','Cloud shadow']]+all_agg_std[['Water','Cloud shadow']]

sns.set(font_scale=2)
sns.set_style('darkgrid',rc={"xtick.bottom" : True, "ytick.left" : True,'axes.edgecolor': 'black'})
plt.figure(figsize=(15,10))
colors=['blue','orange','brown','yellow','green','purple','red','black']
lines = sns.lineplot(data=all_agg_mean,dashes=False,sort=False,palette=colors,legend=False)
lines.lines[0].set_linestyle("--")
plt.legend(all_agg_mean.columns)
plt.ylabel('Surface Reflectance')
plt.fill_between(band_names,lo_bound['Water'], hi_bound['Water'], alpha=.3)
plt.fill_between(band_names,lo_bound['Cloud shadow'], hi_bound['Cloud shadow'], alpha=.3,color=colors[-1])
plt.tight_layout()