## Purpose:

Stack the rasters for each grid and calculate the standard deviation for each pixel.

In [None]:
!pip install pandarallel --user

In [1]:
import matplotlib.pyplot as plt
# from pandarallel import pandarallel
import rasterio as rio
import pandas as pd
import numpy as np
import glob
import os

In [2]:
index = pd.read_pickle('../../data/rasters.pkl')
print('Loaded', len(index), 'rasters')

Loaded 378 rasters


In [3]:
grids = pd.read_pickle('../../data/grid_data.pkl')
print('Loaded', len(grids), 'grids')

Loaded 1263 grids


#### Function to stack all of a grid's rasters.

Creates a numpy array of shape: (num_rasters, 501, 501)

Sorted temporally so that **stack[-1,:,:]** is the most recent image.

In [48]:
def stack_rasters(grid, max_nodata=None):
    '''
    Stacks the grid's rasters into one numpy array
    DIMENSIONS:
    0: time
    1: x
    2: y
    '''
    rasters = glob.glob('../../data/grids/' + str(grid.name) + '/*v3.0_dem.tif')  # grab all rasters
    rasters.sort(key=lambda x: x.split('_')[2])  # sort the rasters by date acquired


    arrays = []
    for raster in rasters:
        dem = rio.open(raster).read(1) # Open Data        
        mask = rio.open(raster).read_masks(1).astype(float)  # Open Mask
        if dem.shape != (501,501):
            print(raster)
            return

        # Calculate Amount of No Data (100 means no data at all)
        _, counts = np.unique(mask, return_counts=True)
        mask[mask==0] = np.nan
        nodata_val = counts[0] / (counts[0] + counts[1]) * 100
        
        # Apply Laser Altimetry Correction
        raster_name = raster[raster.rfind('/')+1:raster.rfind('_')]
        dem = dem + index.loc[raster_name]['dz']
        dem = dem * mask / 255  # Mask the Data (no data becomes nan)
        
        if max_nodata:
            if nodata_val < max_nodata:
                arrays.append(dem)

        else:
            arrays.append(dem)
    
    stack = np.stack(arrays, axis=0)
    return stack #, pd.Series(nodatavals, name='nodata')

#### Function to calculate the standard deviation raster of a grid

Pixel-level standard deviation used to identify bodies of water

In [54]:
def std_raster(grid):
    out_dir = '../../data/standard_deviation_with_corner_check/'
    outfile = out_dir + str(grid.name) + '.tif'
    if os.path.exists(outfile):
        return
    
    stack = stack_rasters(grid, max_nodata=90)  # Stack the rasters with a nodata threshold of 90% 
    stack = np.stack([raster for raster in stack if not nan_corners(raster, max_nan_corners=1)])  # Check the corners of the raster, threshold is 1
    
    std = np.apply_along_axis(np.nanstd, 0, stack)  # Calculate standard deviation
    metadata = rio.open('../../data/grids/' + str(grid.name) + '/' + grid['rasters'][0] + '_dem.tif').meta  # Get Metadata
    metadata.update({'dtype':'float64'})

    # Write raster to file
    with rio.open(out_dir + str(grid.name) + '.tif', 'w', **metadata) as dst:
        dst.write(std, 1)
    return

#### Function to check the corners of each raster for nan values

In [49]:
def nan_corners(raster, max_nan_corners=0):
    max_x = raster.shape[0]-2
    max_y = raster.shape[1]-2
      
    count = int(np.isnan(raster[0,0])) + \
            int(np.isnan(raster[0, max_y])) + \
            int(np.isnan(raster[max_x, 0])) + \
            int(np.isnan(raster[max_x, max_y]))
    
    return count > max_nan_corners

In [42]:
stack = stack_rasters(grids.iloc[0], 90)


In [43]:
stack.shape

(11, 501, 501)

In [57]:
std_raster(grids.iloc[0])

starting std
finished std
