In [1]:
"""
This script calculates the agreement between our Sentinel-based map and three reference datasets
Returns precision, recall and F1 for 1x1 and 3x3 windows for each spatial block

maxwell.cook@colorado.edu
"""

# Packages
import os,sys,time
import numpy as np
import pandas as pd
import rioxarray as rxr
import rasterio
import geopandas as gpd

# Globals

proj = 'EPSG:5070'

rois = ['srme','wrnf']

blocks = gpd.read_file('spatial_block_grid_w_attr.gpkg').to_crs(proj)
block_ids = list(blocks.grid_id)
print(f'Block IDs: {block_ids}')

# Target grid (resampled 10-meter map using maximum resampling)
tests = [
    's2aspen_prob_10m_binOpt_srme.tif',
    's2aspen_prob_10m_binOpt_wrnf.tif'
]

# Reference grids (binary, matched)
refs = [
    'lc16_evt_200_bin_srme_10m.tif',
    'lc16_evt_200_bin_wrnf_10m.tif',
    'usfs_treemap16_bin_srme_10m.tif',
    'usfs_treemap16_bin_wrnf_10m.tif',
    'usfs_itsp_aspen_ba_gt10_srme_10m.tif',
    'usfs_itsp_aspen_ba_gt10_wrnf_10m.tif'
]

# Functions

def blockmax(inarr, blocksize):
    n = blocksize  # Height of window
    m = blocksize  # Width of window
    modulo = inarr.shape[0] % blocksize
    if modulo > 0:
        padby = blocksize - modulo
        inarr_pad = np.pad(inarr, ((0, padby), (0, 0)), mode='constant', constant_values=0)
    else:
        inarr_pad = inarr
    modulo = inarr.shape[1] % blocksize
    if modulo > 0:
        padby = blocksize - modulo
        inarr_pad = np.pad(inarr_pad, ((0, 0), (0, padby)), mode='constant', constant_values=0)
    k = int(inarr_pad.shape[0] / n)  # Must divide evenly
    l = int(inarr_pad.shape[1] / m)  # Must divide evenly
    inarr_pad_blockmax = inarr_pad.reshape(k, n, l, m).max(axis=(-1, -3))  # Numpy >= 1.7.1
    return inarr_pad_blockmax

print("Complete")

Block IDs: ['-116+40', '-116+41', '-117+40', '-117+41', '-117+42', '-117+43', '-117+45', '-117+46', '-117+47', '-118+39', '-118+40', '-118+41', '-118+42', '-118+43', '-118+44', '-118+45', '-118+46', '-118+47', '-119+39', '-119+40', '-119+41', '-119+42', '-119+43', '-119+44', '-119+45', '-119+46', '-119+47', '-120+39', '-120+40', '-120+41', '-120+42', '-120+43', '-120+44', '-120+45', '-120+46', '-121+41', '-121+42', '-121+43', '-121+44', '-122+41', '-122+42', '-122+43']
Complete


In [None]:
# Check the dimensions of the input data to ensure they match
# Loop through ROIs
for i in range(len(rois)):

    roi = rois[i]

    test_file_paths = [test for test in tests if str(roi)+".tif" in test]
    print(test_file_paths[0])
    test = rxr.open_rasterio(test_file_paths[0], cache=False).squeeze()

    ref_file_paths = [ref for ref in refs if str(roi)+"_10m.tif" in ref]
    print(ref_file_paths)

    # Check that they match with the aspen surfaces

    for ref in ref_file_paths:
        print(os.path.basename(ref))

        ref_ = rxr.open_rasterio(ref_file_paths[0], cache=False).squeeze()

        if test.rio.resolution() == ref_.rio.resolution() and \
                test.rio.bounds() == ref_.rio.bounds() and \
                test.shape == ref_.shape:

            print("Ref and Test match ...")

            del ref_

        else:
            print("Mismatch between ref and test ...")

            print(f"Shape of test: {test.shape}\nBounds of ref: {ref_.shape}")
            print(f"Resolution of test: {test.rio.resolution()}\nResolution of ref: {ref_.rio.resolution()}")
            print(f"Bounds of test: {test.rio.bounds()}\nBounds of ref: {ref_.rio.bounds()}")

            del ref_

            print(f"Matching reference image to test image for {os.path.basename(ref)}")
            img = rxr.open_rasterio(ref,masked=True,cache=False).squeeze()
            img = img.fillna(0).astype(np.uint16)
            img_match = img.rio.reproject_match(test)
            out_path = ref[:-4]+".tif"
            print(out_path)
            img_match.rio.to_raster(
                out_path, tiled=True, lock=threading.Lock(), windowed=True,
                compress='zstd', zstd_level=9, num_threads='all_cpus',
                dtype='uint16', driver='GTiff'
            )

            del img, img_match

    del test, ref

In [None]:
# Run the workflow for each ROI

# Filter runtime warnings (we get a cast warning but it is benign)
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

begin = time.time()

blocksizes = [1, 3]  # block sizes (in pixel) used as analytical units.

for roi in rois:

    print(f"Starting for {roi}")

    # List the test/ref images
    test_path = [test for test in tests if str(roi)+".tif" in test]
    print(test_path[0])
    
    # Open the test image (Sentinel-based map)
    test_img = rxr.open_rasterio(test_path[0], masked=True, cache=False).astype(rasterio.uint8).squeeze()

    ref_file_paths = [ref for ref in refs if str(roi)+"_10m.tif" in ref]
    print(ref_file_paths)

    # Loop through reference images
    out_refs = []
    for ref_tif in ref_file_paths:
        
        print(ref_tif)

        ref_img = rxr.open_rasterio(ref_tif, masked=True, cache=False).astype(rasterio.uint8).squeeze()

        name = os.path.basename(ref_tif)[:-4]
        print(name)
        
        # If SRME, run by spatial block
        if 'srme' in roi:

            print(f'Starting analysis by spatial block unit for the SRME ...')
            
            all_blocks_out = []
            for bid in block_ids:
                
                # Grab the block area
                cl = blocks[blocks.grid_id == bid]
                
                # Clip the images to the block area
                test_arr = test_img.rio.clip(cl.geometry).values
                ref_arr = ref_img.rio.clip(cl.geometry).values
                
                outdata = []
                for blocksize in blocksizes:

                    if blocksize > 1:
                        arr_ref_res = blockmax(ref_arr, blocksize)
                        arr_test_res = blockmax(test_arr, blocksize)
                    else:
                        arr_ref_res = ref_arr
                        arr_test_res = test_arr

                    # Print the shapes for debugging
                    print(
                        f"Blocksize {blocksize}: Reference - {arr_ref_res.shape}, Test - {arr_test_res.shape}")

                    print("Creating data frame")

                    # Check if the reshaped arrays have the same shape
                    if arr_ref_res.shape != arr_test_res.shape:
                        raise ValueError(
                            f"Reference and test arrays have different shapes: {arr_ref_res.shape} vs {arr_test_res.shape}")

                    currdf = pd.DataFrame({
                        'ref': arr_ref_res.flatten(),
                        'test': arr_test_res.flatten()
                    })

                    # Free up some more space
                    del arr_ref_res, arr_test_res

                    currdf = currdf[-np.logical_and(currdf.ref == 0, currdf.test == 0)]
                    tp = len(currdf[np.logical_and(currdf.ref == 1, currdf.test == 1)])
                    fp = len(currdf[np.logical_and(currdf.ref == 0, currdf.test == 1)])
                    fn = len(currdf[np.logical_and(currdf.ref == 1, currdf.test == 0)])
                    print(blocksize, tp, fp, fn)
                    outdata.append([blocksize, tp, fp, fn])
                    
                    del currdf, tp, fp, fn, blocksize

                del test_arr, ref_arr

                outdatadf = pd.DataFrame(outdata, columns=['blocksize', 'tp', 'fp', 'fn'])
                outdatadf['prec'] = outdatadf.tp / (outdatadf.tp + outdatadf.fp).astype(np.float16)
                outdatadf['rec'] = outdatadf.tp / (outdatadf.tp + outdatadf.fn).astype(np.float16)
                outdatadf['source'] = name
                outdatadf['grid_id'] = bid
                # outdatadf.to_csv(f'global_accmeas_multi_blocks_{name}.csv',index=False)
                all_blocks_out.append(outdatadf)
                outdata = []
                
                del outdatadf
            
            del ref_img
            # Now concatenate the block results
            outdatadf_blocks = pd.concat(all_blocks_out).reset_index(drop=True)
            outdatadf_blocks.to_csv(f"global_accmeas_multi_blocks_full_{roi}.csv", index=False)

        else:
            print("No block analysis for WRNF ...")
            
            test_arr = test_img.values
            ref_ar = ref_img.values
            
            del ref_img

            outdata = []
            for blocksize in blocksizes:

                if blocksize > 1:
                    arr_ref_res = blockmax(ref_arr, blocksize)
                    arr_test_res = blockmax(test_arr, blocksize)
                else:
                    arr_ref_res = ref_arr
                    arr_test_res = test_arr

                # Free up some space
                del ref_arr

                # Print the shapes for debugging
                print(
                    f"Blocksize {blocksize}: Reference - {arr_ref_res.shape}, Test - {arr_test_res.shape}")

                print("Creating data frame")

                # Check if the reshaped arrays have the same shape
                if arr_ref_res.shape != arr_test_res.shape:
                    raise ValueError(
                        f"Reference and test arrays have different shapes: {arr_ref_res.shape} vs {arr_test_res.shape}")

                currdf = pd.DataFrame({
                    'ref': arr_ref_res.flatten(),
                    'test': arr_test_res.flatten()
                })

                # Free up some more space
                del arr_ref_res, arr_test_res

                currdf = currdf[-np.logical_and(currdf.ref == 0, currdf.test == 0)]
                tp = len(currdf[np.logical_and(currdf.ref == 1, currdf.test == 1)])
                fp = len(currdf[np.logical_and(currdf.ref == 0, currdf.test == 1)])
                fn = len(currdf[np.logical_and(currdf.ref == 1, currdf.test == 0)])
                print(blocksize, tp, fp, fn)
                outdata.append([blocksize, tp, fp, fn])

            del test_arr

            outdatadf = pd.DataFrame(outdata, columns=['blocksize', 'tp', 'fp', 'fn'])
            outdatadf['prec'] = outdatadf.tp / (outdatadf.tp + outdatadf.fp).astype(np.float64)
            outdatadf['rec'] = outdatadf.tp / (outdatadf.tp + outdatadf.fn).astype(np.float64)
            outdatadf['source'] = name
            outdatadf.to_csv(f'global_accmeas_multi_blocks_{name}.csv',index=False)
            out_refs.append(outdatadf)
            outdata = []

        # Bind the results together for plotting
        outdfs = pd.concat(out_refs).reset_index(drop=True)
        outdfs.to_csv(f"global_accmeas_multi_blocks_full_{roi}.csv", index=False)
    
    del test_img
    
print("Complete!")

print(time.time() - begin)

Starting for srme
s2aspen_prob_10m_binOpt_srme.tif
['lc16_evt_200_bin_srme_10m.tif', 'usfs_treemap16_bin_srme_10m.tif', 'usfs_itsp_aspen_ba_gt10_srme_10m.tif']
lc16_evt_200_bin_srme_10m.tif
lc16_evt_200_bin_srme_10m
Starting analysis by spatial block unit for the SRME ...
Blocksize 1: Reference - (10680, 8779), Test - (10680, 8779)
Creating data frame
1 0 2 468
Blocksize 3: Reference - (3560, 2927), Test - (3560, 2927)
Creating data frame
3 1 1 116
Blocksize 1: Reference - (10676, 8684), Test - (10676, 8684)
Creating data frame
1 1410 3864 7245
Blocksize 3: Reference - (3559, 2895), Test - (3559, 2895)
Creating data frame
3 399 685 2100
Blocksize 1: Reference - (10746, 8866), Test - (10746, 8866)
Creating data frame
1 780 11475 24072
Blocksize 3: Reference - (3582, 2956), Test - (3582, 2956)
Creating data frame
3 252 3971 4091
Blocksize 1: Reference - (10742, 8772), Test - (10742, 8772)
Creating data frame
1 197538 744135 408055
Blocksize 3: Reference - (3581, 2924), Test - (3581, 2924