# DEM, Shapefile filtering
A notebook filtering a DEM based on a shapefile

In [None]:
import rioxarray
import xarray
import geopandas
import shapely.geometry
import rasterio
import rasterio.mask
import numpy
import matplotlib.pyplot
import scipy.ndimage

## Load in data
Note that the LiDAR (and DEMs were downloaded from OpenTopgraphy with EPSG:2193)
Horizontal Coordinates: NZGD2000 / New Zealand Transverse Mercator 2000 [EPSG: 2193]
Vertical Coordinates: NZVD2016 [EPSG: 7839]

In [None]:
# from Open Topography
dem_tiff=rioxarray.rioxarray.open_rasterio(r'C:\Users\pearsonra\Documents\data\herbert_10res\dem\dem.tif', masked=True)
# from QGIS
ocean_shape = geopandas.read_file(r'C:\Users\pearsonra\Documents\data\herbert_10res\fake_coast.shp')

## View and correct projections
### Projection for DEM

In [None]:
dem_tiff.rio.crs

### Projection from Shape files
Set projection based on dem just incase they are different

In [None]:
ocean_shape = ocean_shape.to_crs(dem_tiff.rio.crs)

## View DEM and select region to filter
Below the dem and the ocean shape file - saved out of QGIS with a projection.

In [None]:
f, ax = matplotlib.pyplot.subplots(figsize=(10, 5))
dem_tiff.plot(cmap="Greys_r",
                 ax=ax)
ocean_shape.plot(color='None',
                    edgecolor='teal',
                    linewidth=2,
                    ax=ax,
                    zorder=4)
ax.set_title("LiDAR Herbert - with coast projection from QGIS")
#ax.set_axis_off()
matplotlib.pyplot.show()

The DEM zoomed in on the region to manually create a polygon here

In [None]:
f, ax = matplotlib.pyplot.subplots(figsize=(10, 5))

dem_tiff.plot(cmap="Greys_r", ax=ax)

ax.set_title("LiDAR Herbert - zoomed in on coast")
matplotlib.pyplot.xlim([1.43222e6, 1.434e6])
matplotlib.pyplot.ylim([4.9857e6, 4.9873e6])

matplotlib.pyplot.show()

## Manually create a polygon

In [None]:
ocean_manual_shape = shapely.geometry.Polygon([(1.43222e6, 4.9857e6), # x1, y1
            (1.43400e6, 4.9857e6), # x2, y1
            (1.43400e6, 4.9873e6), # x2, y2
            (1.43222e6, 4.9873e6)  # x1, y2
        ])
ocean_manual_gdf = geopandas.GeoSeries([ocean_manual_shape])

## Plot both together

In [None]:
f, ax = matplotlib.pyplot.subplots(figsize=(10, 5))

dem_tiff.plot(cmap="Greys_r", ax=ax)
ocean_manual_gdf.plot(color='None', edgecolor='teal', linewidth=2, ax=ax, zorder=4)

ax.set_title("LiDAR Herbert")
matplotlib.pyplot.show()

## Filtering
#### Create a simple mask from the shapefile

In [None]:
ocean_mask=rasterio.features.geometry_mask([ocean_manual_shape], out_shape=(len(dem_tiff.y), len(dem_tiff.x)),
                                      transform=dem_tiff.rio.transform(), invert=True)

In [None]:
f, ax = matplotlib.pyplot.subplots(figsize=(10, 5))
p=matplotlib.pyplot.imshow(ocean_mask)
f.colorbar(p, ax=ax)

#### Combine DEM array with ocean mask and set ocean values to -10

In [None]:
combined = numpy.copy(dem_tiff.data[0])

In [None]:
combined[ocean_mask & numpy.isnan(combined)] = -20

In [None]:
f, ax = matplotlib.pyplot.subplots(figsize=(10, 5))
p=matplotlib.pyplot.imshow(combined)
f.colorbar(p, ax=ax)

#### Perform filtering
* Gaussian filter with 20 smoothing
  * Note - need to use two array amd combine to deal with NaN's - one with NaN's as 0 and one with NaN's as 1.
  * _**Need to figure out how to calculate the sigma value.**_
* Currently filtering applied based on all valid data
  * Could look at restricting filtering in channels to only data within the channel

In [None]:
sigma = 0.5 # will need to calculate based on spatial spacing

combined_no_nan = numpy.copy(combined)
combined_no_nan[numpy.isnan(combined_no_nan)] = 0
nan_mask = numpy.ones(combined.shape)
nan_mask[numpy.isnan(combined)] = 0
smoothed = scipy.ndimage.gaussian_filter(combined_no_nan, sigma=sigma) / scipy.ndimage.gaussian_filter(nan_mask, sigma=sigma)

##### _Set values back to NaN_
This is more complex than the case that the LiDAR extends all the way to the edge as it is hard to distinguish between internal holes and external missing data

In [None]:
smoothed[smoothed==0 & numpy.isnan(combined)] = numpy.nan

#### Use the smothed array only where no valid data exists

In [None]:
combined[numpy.isnan(combined)] = smoothed[numpy.isnan(combined)]

### Plot results 

In [None]:
#f, ax = matplotlib.pyplot.subplots(figsize=(10, 5))
#p=matplotlib.pyplot.imshow(smoothed)

f = matplotlib.pyplot.figure(figsize=(15, 5))

ax1 = f.add_subplot(121)  # left side
p1 = ax1.imshow(dem_tiff.data[0])
f.colorbar(p1, ax=ax1)

ax2 = f.add_subplot(122)  # right side
p2 = ax2.imshow(combined)
f.colorbar(p2, ax=ax2)

ax1.set_title("Original Open Topography DEM")
ax2.set_title("Open Topgraphy DEM + smoothing in holes")

In [None]:
#show artifacts - bump in channel

f = matplotlib.pyplot.figure(figsize=(15, 5))
ax1 = f.add_subplot(121)  # left side
p1 = ax1.imshow(dem_tiff.data[0], vmin = -5, vmax = 15)
matplotlib.pyplot.xlim([500, 880])
matplotlib.pyplot.ylim([250, 450])

ax2 = f.add_subplot(122)  # right side
p2 = ax2.imshow(combined, vmin = -5, vmax = 15)
matplotlib.pyplot.xlim([500, 880])
matplotlib.pyplot.ylim([250, 450])
f.colorbar(p1, ax=ax1)
f.colorbar(p2, ax=ax2)

ax1.set_title("Original Open Topography DEM")
ax2.set_title("Open Topgraphy DEM + smoothing in holes")

#### Fill in any missing values with NN
Note there is an additional challenge here as LiDAR does not extend to the boundary.

This is a common challenge faced by Cyprien. He has explored extrapolating to a very high value around the edge, and also smoothing onto a background DEM (say NZ wide 15m dem)

In [None]:
rasterio.features.geometry_mask(sf.iloc[0],
                                      out_shape=(len(ndvi.y), len(ndvi.x)),
                                      transform=ndvi.geobox.transform,
                                      invert=True)