# DEM - hole filling and ocean masking
A notebook filtering a DEM with a Gaussian filter for hole filling, and using a shapefile for defining the ocean. Plots are used to demonstrates several challenges with filtering in plots.

In [None]:
import rioxarray
import xarray
import geopandas
import shapely.geometry
import rasterio
import rasterio.mask
import numpy
import matplotlib.pyplot
import scipy.ndimage
import scipy.interpolate

# Setup
### Parameters
sigma of 0.9 is minimum value to fill all holes

In [None]:
# sigma - defined locally twice
depth = -10

### Load in data
Note that the LiDAR (and DEMs were downloaded from OpenTopgraphy with EPSG:2193)
Horizontal Coordinates: NZGD2000 / New Zealand Transverse Mercator 2000 [EPSG: 2193]
Vertical Coordinates: NZVD2016 [EPSG: 7839]

In [None]:
# from Open Topography
dem_tiff=rioxarray.rioxarray.open_rasterio(r'C:\Users\pearsonra\Documents\data\herbert_10res\dem\dem.tif', masked=True)
# from QGIS
ocean_shape = geopandas.read_file(r'C:\Users\pearsonra\Documents\data\herbert_10res\fake_coast.shp')

Ensure dem is appropiately named

In [None]:
dem_tiff = dem_tiff.rename('z')

### View and correct projections
#### Projection for DEM

In [None]:
dem_tiff.rio.crs

#### Projection from Shape files
Set projection based on dem just incase they are different

In [None]:
ocean_shape = ocean_shape.to_crs(dem_tiff.rio.crs)

### View DEM and selected region to filter
Below the dem and the ocean shape file - saved out of QGIS with a projection.

In [None]:
f, ax = matplotlib.pyplot.subplots(figsize=(10, 5))
dem_tiff.plot(cmap="Greys_r",
                 ax=ax)
ocean_shape.plot(color='None', edgecolor='teal', linewidth=2, ax=ax, zorder=4)
ax.set_title("LiDAR Herbert - with coast projection from QGIS")
matplotlib.pyplot.show()

The DEM zoomed in on the region to manually create a polygon here

In [None]:
f, ax = matplotlib.pyplot.subplots(figsize=(10, 5))

dem_tiff.plot(cmap="Greys_r", ax=ax)

ax.set_title("LiDAR Herbert - zoomed in on coast")
matplotlib.pyplot.xlim([1.4320e6, 1.4330e6])
matplotlib.pyplot.ylim([4.9857e6, 4.9873e6])

matplotlib.pyplot.show()

### Manually create a polygon

In [None]:
ocean_manual_shape = shapely.geometry.Polygon([(1.43222e6, 4.9857e6), # x1, y1
            (1.43400e6, 4.9857e6), # x2, y1
            (1.43400e6, 4.9873e6), # x2, y2
            (1.43222e6, 4.9873e6)  # x1, y2
        ])
ocean_manual_gdf = geopandas.GeoSeries([ocean_manual_shape])

#### Plot both together

In [None]:
f, ax = matplotlib.pyplot.subplots(figsize=(10, 5))

dem_tiff.plot(cmap="Greys_r", ax=ax)
ocean_manual_gdf.plot(color='None', edgecolor='teal', linewidth=2, ax=ax, zorder=4)

ax.set_title("LiDAR Herbert")
matplotlib.pyplot.show()

# Attempt 1 - Gaussian filter combined raster
Create a combined raster with the original DEM and an ocean depth mapped over the ocean shapefile but excluding DEM values.
Apply a Gaussian filter across the combined DEM.
#### Sigma used for filtering

In [None]:
sigma = 0.5 # should calculate on the spatial resolution

#### Create a simple mask from the shapefile

In [None]:
ocean_mask=rasterio.features.geometry_mask([ocean_manual_shape], out_shape=(len(dem_tiff.y), len(dem_tiff.x)),
                                      transform=dem_tiff.rio.transform(), invert=True)

In [None]:
f, ax = matplotlib.pyplot.subplots(figsize=(5, 3))
p=matplotlib.pyplot.imshow(ocean_mask)
f.colorbar(p, ax=ax)

#### Combine DEM array with ocean mask and set ocean values to -10

In [None]:
combined = numpy.copy(dem_tiff.data[0])
combined[ocean_mask & numpy.isnan(combined)] = depth

In [None]:
f, ax = matplotlib.pyplot.subplots(figsize=(10, 5))
p=matplotlib.pyplot.imshow(combined)
f.colorbar(p, ax=ax)

#### Perform filtering
* Gaussian filter with 20 smoothing
  * Note - need to use two array amd combine to deal with NaN's - one with NaN's as 0 and one with NaN's as 1.
  * _**Need to figure out how to calculate the sigma value.**_
* Currently filtering applied based on all valid data
  * Could look at restricting filtering in channels to only data within the channel

In [None]:
# used approach from https://stackoverflow.com/questions/18697532/gaussian-filtering-a-image-with-nan-in-python
combined_no_nan = numpy.copy(combined)
combined_no_nan[numpy.isnan(combined_no_nan)] = 0
nan_mask = numpy.ones(combined.shape)
nan_mask[numpy.isnan(combined)] = 0
smoothed = (scipy.ndimage.gaussian_filter(combined_no_nan, sigma=sigma) / 
            scipy.ndimage.gaussian_filter(nan_mask, sigma=sigma))

#### Set values back to NaN
This is more complex than the case that the LiDAR extends all the way to the edge as it is hard to distinguish between internal holes and external missing data

In [None]:
smoothed[smoothed==0 & numpy.isnan(combined)] = numpy.nan

#### Use the smothed array only where no valid data exists

In [None]:
combined[numpy.isnan(combined)] = smoothed[numpy.isnan(combined)]

### Plot results - Bad as off cliff
As the original DEM values are used these will overwrite the smoothing on land so it will go from land abrubtly to a value Gaussian smoothed with the Bathymetry depth (10) 

In [None]:
f = matplotlib.pyplot.figure(figsize=(15, 10))
gs = f.add_gridspec(2, 2)

ax1 = f.add_subplot(gs[0, 0]) 
p1 = ax1.imshow(dem_tiff.data[0])
f.colorbar(p1, ax=ax1)

ax2 = f.add_subplot(gs[0, 1]) 
p2 = ax2.imshow(dem_tiff.data[0], vmin = depth, vmax = 10)
f.colorbar(p2, ax=ax2)
matplotlib.pyplot.xlim([500, 880])
matplotlib.pyplot.ylim([200, 500])

ax1.set_title("Original Open Topography DEM")
ax2.set_title("Original Zoomed")

ax3 = f.add_subplot(gs[1, 0])  
p3 = ax3.imshow(combined)
f.colorbar(p3, ax=ax3)

ax4 = f.add_subplot(gs[1, 1])
p4 = ax4.imshow(combined, vmin = depth, vmax = 10)
f.colorbar(p4, ax=ax4)
matplotlib.pyplot.xlim([500, 880])
matplotlib.pyplot.ylim([200, 500])

ax3.set_title("Filled + Fake Bathymetry DEM")
ax4.set_title("Filled + Fake Bathymetry Zoomed");

### _The question of fill in any missing values with Nearest Neighbour_
Note there is an additional challenge here as LiDAR does not extend to the boundary - so it will be hard to use NN on only the internal holes and not the external holes.

This is a common challenge faced by Cyprien. He has explored extrapolating to a very high value around the edge, and also smoothing onto a background DEM (say NZ wide 15m dem)

_**I will explore some of those options in later notebooks - for now I will just use a larger sigma.**_

# Attempt 2 - Fill holes and then after patch in the ocean 
Filter the original DEM initially to fill in any holes, and then use this filled DEM as a base for patching in the ocean. Tried two different approaches for ocean patching.
1. Patch the ocean with a Gaussian filter - poor results = bleed across and then step abruptly from orginal DEM
2. Include a gap between the patch and original DEM and apply a Gaussian filter with  - still a very sharp transition
3. Include a gap between the patch and original DEM and apply a linear interpolant - better but will act as a dam

#### sigma - set to fill all holes

In [None]:
sigma = 0.9 # will need to calculate based on spatial spacing

## 1. Different Gaussian smoothing - Ocean patch to DEM boundary
1. Fill internal holes - with Gaussian smoothing
2. Apply higher smoothing with ocean mask - repeat several times to avoid water-fall

#### Hole filling

In [None]:
dem_no_nan = numpy.copy(dem_tiff.data[0])
dem_no_nan[numpy.isnan(dem_no_nan)] = 0
nan_mask = numpy.ones(dem_tiff.data[0].shape)
nan_mask[numpy.isnan(dem_tiff.data[0])] = 0

smoothed = scipy.ndimage.gaussian_filter(dem_no_nan, sigma=sigma) / scipy.ndimage.gaussian_filter(nan_mask, sigma=sigma)

smoothed[smoothed==0 & numpy.isnan(dem_tiff.data[0])] = numpy.nan

dem_no_hole = numpy.copy(dem_tiff.data[0])
dem_no_hole[numpy.isnan(dem_tiff.data[0])] = smoothed[numpy.isnan(dem_tiff.data[0])]

#### Plot hole filling results - Bad - note dilation out 
The process effectively causes dialation around the coast. Could perhaps overcome by doing a straight replacement where the ocean mask is 1

In [None]:
f = matplotlib.pyplot.figure(figsize=(15, 5))

ax1 = f.add_subplot(121)  # left side
p1 = ax1.imshow(dem_tiff.data[0])
f.colorbar(p1, ax=ax1)
matplotlib.pyplot.xlim([500, 880])
matplotlib.pyplot.ylim([250, 450])

ax2 = f.add_subplot(122)  # right side
p2 = ax2.imshow(dem_no_hole)
f.colorbar(p2, ax=ax2)
matplotlib.pyplot.xlim([500, 880])
matplotlib.pyplot.ylim([250, 450])

ax1.set_title("Original Open Topography DEM")
ax2.set_title("Open Topgraphy DEM + smoothing in holes")

#### Remove dilation - Replace values where the ocean mask is 1

In [None]:
dem_no_hole[ocean_mask==1] = dem_tiff.data[0][ocean_mask==1]

f = matplotlib.pyplot.figure(figsize=(15, 5))

ax1 = f.add_subplot(121)  # left side
p1 = ax1.imshow(dem_tiff.data[0])
f.colorbar(p1, ax=ax1)
matplotlib.pyplot.xlim([500, 880])
matplotlib.pyplot.ylim([250, 450])

ax2 = f.add_subplot(122)  # right side
p2 = ax2.imshow(dem_no_hole)
f.colorbar(p2, ax=ax2)
matplotlib.pyplot.xlim([500, 880])
matplotlib.pyplot.ylim([250, 450])

ax1.set_title("Original Open Topography DEM")
ax2.set_title("Open Topgraphy DEM + smoothing in holes")

### Apply the ocean patch
Apply it straight this will cause a waterfall once the original values are copied back on DEM

In [None]:
sigma = .9 # will need to calculate based on spatial spacing

combined = numpy.copy(dem_no_hole)
combined[ocean_mask & numpy.isnan(combined)] = depth

combined_no_nan = numpy.copy(combined)
combined_no_nan[numpy.isnan(combined_no_nan)] = 0
nan_mask = numpy.ones(combined.shape)
nan_mask[numpy.isnan(combined)] = 0

smoothed = scipy.ndimage.gaussian_filter(combined_no_nan, sigma=sigma) / scipy.ndimage.gaussian_filter(nan_mask, sigma=sigma)

smoothed[smoothed==0 & numpy.isnan(dem_no_hole)] = numpy.nan

dem_combined = numpy.copy(dem_no_hole)
dem_combined[numpy.isnan(dem_no_hole)] = smoothed[numpy.isnan(dem_no_hole)]

### Plot - note bad result
The process still effectively causes a very steep drop off along the coast - in fact the only difference to approach 1 is different sigmas can be used for the two filtering steps.

In [None]:
f = matplotlib.pyplot.figure(figsize=(15, 10))
gs = f.add_gridspec(2, 4)

ax1 = f.add_subplot(gs[0, 0:3]) 
p1 = ax1.imshow(dem_tiff.data[0])
f.colorbar(p1, ax=ax1)

ax2 = f.add_subplot(gs[0, 3]) 
p2 = ax2.imshow(dem_tiff.data[0], vmin = -10, vmax = 10)
f.colorbar(p2, ax=ax2)
matplotlib.pyplot.xlim([840, 880])
matplotlib.pyplot.ylim([240, 380])

ax1.set_title("Original Open Topography DEM")
ax2.set_title("Original Zoomed")

ax3 = f.add_subplot(gs[1, 0:3])  
p3 = ax3.imshow(dem_combined)
f.colorbar(p3, ax=ax3)

ax4 = f.add_subplot(gs[1, 3])
p4 = ax4.imshow(dem_combined, vmin = -10, vmax = 10)
f.colorbar(p4, ax=ax4)
matplotlib.pyplot.xlim([840, 880])
matplotlib.pyplot.ylim([240, 380])

ax3.set_title("Filled + Fake Bathymetry DEM")
ax4.set_title("Filled + Bathymetry");

## 2. Different Gaussian smoothing - Ocean patch with gap to DEM boundary
Repeat but with nan gap between DEM and ocean fill
Use the dilation from the hole filling to provide a buffer

#### Hole filling

In [None]:
dem_no_nan = numpy.copy(dem_tiff.data[0])
dem_no_nan[numpy.isnan(dem_no_nan)] = 0
nan_mask = numpy.ones(dem_tiff.data[0].shape)
nan_mask[numpy.isnan(dem_tiff.data[0])] = 0

smoothed = scipy.ndimage.gaussian_filter(dem_no_nan, sigma=sigma) / scipy.ndimage.gaussian_filter(nan_mask, sigma=sigma)

smoothed[smoothed==0 & numpy.isnan(dem_tiff.data[0])] = numpy.nan

dem_no_hole_dialation = numpy.copy(dem_tiff.data[0])
dem_no_hole_dialation[numpy.isnan(dem_tiff.data[0])] = smoothed[numpy.isnan(dem_tiff.data[0])]

#####  dilation removal + buffer combined

In [None]:
dem_no_hole = numpy.copy(dem_no_hole_dialation)
dem_no_hole[ocean_mask==1] = dem_tiff.data[0][ocean_mask==1]

combined = numpy.copy(dem_no_hole)
combined[ocean_mask & numpy.isnan(dem_no_hole_dialation)] = -10

##### Plot - show buffer

In [None]:
f = matplotlib.pyplot.figure(figsize=(15, 5))

ax1 = f.add_subplot(121)  # left side
p1 = ax1.imshow(dem_no_hole, vmin = -10, vmax = 10)
f.colorbar(p1, ax=ax1)
matplotlib.pyplot.xlim([500, 880])
matplotlib.pyplot.ylim([250, 450])

ax2 = f.add_subplot(122)  # right side
p2 = ax2.imshow(combined, vmin = -10, vmax = 10)
f.colorbar(p2, ax=ax2)
matplotlib.pyplot.xlim([500, 880])
matplotlib.pyplot.ylim([250, 450])

ax1.set_title("Original Open Topography DEM")
ax2.set_title("Open Topgraphy DEM + smoothing in holes")

##### Apply filter

In [None]:
sigma = 0.9

combined_no_nan = numpy.copy(combined)
combined_no_nan[numpy.isnan(combined_no_nan)] = 0
nan_mask = numpy.ones(combined.shape)
nan_mask[numpy.isnan(combined)] = 0

smoothed = scipy.ndimage.gaussian_filter(combined_no_nan, sigma=sigma) / scipy.ndimage.gaussian_filter(nan_mask, sigma=sigma)

smoothed[smoothed==0 & numpy.isnan(dem_no_hole)] = numpy.nan

dem_combined = numpy.copy(dem_no_hole)
dem_combined[numpy.isnan(dem_no_hole)] = smoothed[numpy.isnan(dem_no_hole)]

## Plot results - bad  - hard transition in the midle of the buffer
The transition is very hard from mainly land to mainly ocean

In [None]:
f = matplotlib.pyplot.figure(figsize=(15, 10))
gs = f.add_gridspec(2, 4)

ax1 = f.add_subplot(gs[0, 0:3]) 
p1 = ax1.imshow(dem_tiff.data[0])
f.colorbar(p1, ax=ax1)

ax2 = f.add_subplot(gs[0, 3]) 
p2 = ax2.imshow(dem_tiff.data[0], vmin = -10, vmax = 10)
f.colorbar(p2, ax=ax2)
matplotlib.pyplot.xlim([840, 880])
matplotlib.pyplot.ylim([240, 380])

ax1.set_title("Original Open Topography DEM")
ax2.set_title("Original Zoomed")

ax3 = f.add_subplot(gs[1, 0:3])  
p3 = ax3.imshow(smoothed)
f.colorbar(p3, ax=ax3)

ax4 = f.add_subplot(gs[1, 3])
p4 = ax4.imshow(smoothed, vmin = -10, vmax = 10)
f.colorbar(p4, ax=ax4)
matplotlib.pyplot.xlim([840, 880])
matplotlib.pyplot.ylim([240, 380])

ax3.set_title("Smoothed and filled DEM")
ax4.set_title("Smoothed zoomed");

## 3. Linear interpolation - Ocean patch with gap to DEM boundary
Still Gaussian filtering for hole filling, but then use lienar interpolation to bring in the fake bathymetry.
Using Scipy interpolate

In [None]:
interpolate_array = numpy.copy(combined)
x = numpy.arange(0, interpolate_array.shape[1])
y = numpy.arange(0, interpolate_array.shape[0])
#convert interpolate_array into a mask array with all nan masked as invalid values
interpolate_array = numpy.ma.masked_invalid(interpolate_array)
xx, yy = numpy.meshgrid(x, y)
#get only the valid values
x1 = xx[~interpolate_array.mask]
y1 = yy[~interpolate_array.mask]
temp_array = interpolate_array[~interpolate_array.mask]

# method='nearest' 'linear' 'cubic'
interpolate_array = scipy.interpolate.griddata((x1, y1), temp_array.ravel(), (xx, yy), method='linear') 

Mask outside to nan (outside dialated dem and ocean mask)

In [None]:
dem_smoothed = numpy.copy(interpolate_array)
dem_smoothed[numpy.isnan(dem_no_hole_dialation) & (ocean_mask==0)] = numpy.nan

### Plot - beter but still bad - smoth transtion in buffer
We end up with some areas in the buffer being dominated by the land DEMs forming islands. 

In [None]:
f = matplotlib.pyplot.figure(figsize=(15, 10))
gs = f.add_gridspec(2, 4)

ax1 = f.add_subplot(gs[0, 0:3])  # left side
p1 = ax1.imshow(dem_tiff.data[0])#, vmin = -10, vmax = 10)
f.colorbar(p1, ax=ax1)

ax2 = f.add_subplot(gs[0, 3])  # left side
p2 = ax2.imshow(dem_tiff.data[0], vmin = -10, vmax = 10)
f.colorbar(p2, ax=ax2)
matplotlib.pyplot.xlim([840, 880])
matplotlib.pyplot.ylim([240, 380])

ax1.set_title("Original Open Topography DEM")
ax2.set_title("Original Zoomed")

ax3 = f.add_subplot(gs[1, 0:3])  # right side
p3 = ax3.imshow(dem_smoothed)#, vmin = -10, vmax = 10)
f.colorbar(p3, ax=ax3)

ax4 = f.add_subplot(gs[1, 3])  # right side
p4 = ax4.imshow(dem_smoothed, vmin = -10, vmax = 10)
f.colorbar(p4, ax=ax4)
matplotlib.pyplot.xlim([840, 880])
matplotlib.pyplot.ylim([240, 380])

ax3.set_title("Smoothed and filled DEM")
ax4.set_title("Smoothed zoomed");

# Save out filled and bathymetry faked DEM
Convert to an xarray.dataset, and add wanted global attributes

In [None]:
dem_filtered = dem_tiff.to_dataset()
dem_filtered.attrs['long_name'] = 'Elevation after smoothing a patching off to sea'
dem_filtered.attrs['units'] = 'm'

Drop extra dimensions (i.e. 'band' and 'spatial_ref')

In [None]:
dem_filtered = dem_filtered.drop_vars(["band", "spatial_ref"])
dem_filtered = dem_filtered.drop_dims(["band"])

Add data to the netCDF array. Will also add the original - but may not want it when sending to GPU or working with big datasets

In [None]:
dem_filtered = dem_tiff.to_dataset()
dem_filtered = dem_filtered.drop_vars(["band", "spatial_ref"])
dem_filtered = dem_filtered.drop_dims(["band"])
dem_filtered.attrs['long_name'] = 'Elevation after smoothing a patching off to sea'
dem_filtered.attrs['units'] = 'm'
dem_filtered = dem_filtered.assign(z=(('y','x'), dem_smoothed))
dem_filtered = dem_filtered.assign(original_z=(('y','x'), dem_tiff.data[0]))

Flip the y-axis direction to that expected by BG-FLOOD (i.e. increasing along each direcition)

In [None]:
dem_filtered = dem_filtered.reindex(x=dem_filtered.x, y=dem_filtered.y[::-1])

Save results

In [None]:
dem_filtered.to_netcdf(r'C:\Users\pearsonra\Documents\data\herbert_10res\dem\dem_filled_and_fake_bathymetry.nc')