### Identifying No-Data Chips

After chipping out a raster, any no-data areas in the original raster will result in chips consisting entirely of no-data. In this notebook, I figure out a way to programatically detect these no-data chips.

In [9]:
# Import statements

import numpy as np
import os
import rasterio as rio

In [3]:
# Need to get the locations of the chips to read them with Rasterio

root = 'D:/canopy_data/pipeline-products/raw_products/SAFE_ordered/1/' # Change this to whichever folder contains your rasters
tci_files = os.listdir(root)
tci_files[0]

'T33NTB_20191110T092119_TCI_10m.jp2'

In [4]:
# Attach the root to the result of os.listdir to get the full location

file1_uri = root + tci_files[0]
file1_uri

'D:/canopy_data/pipeline-products/raw_products/SAFE_ordered/1/T33NTB_20191110T092119_TCI_10m.jp2'

In [5]:
# Use rasterio to open the file

with rio.open(file1_uri) as file1:
    raster1 = file1.read()

In [6]:
# Look at the raster

raster1

array([[[ 55,  62,  63, ...,  27,  28,  28],
        [ 59,  61,  65, ...,  26,  26,  28],
        [ 61,  63,  62, ...,  27,  27,  29],
        ...,
        [255, 255, 255, ...,  23,  24,  26],
        [255, 255, 255, ...,  22,  24,  26],
        [255, 255, 255, ...,  24,  27,  32]],

       [[ 78,  81,  79, ...,  44,  46,  46],
        [ 78,  76,  78, ...,  51,  46,  43],
        [ 79,  79,  81, ...,  45,  45,  49],
        ...,
        [255, 255, 255, ...,  42,  39,  43],
        [255, 255, 255, ...,  38,  37,  40],
        [255, 255, 255, ...,  39,  43,  46]],

       [[ 66,  72,  68, ...,  31,  35,  35],
        [ 71,  71,  74, ...,  38,  34,  31],
        [ 71,  69,  70, ...,  32,  32,  36],
        ...,
        [255, 255, 255, ...,  33,  31,  35],
        [255, 255, 255, ...,  31,  33,  35],
        [255, 255, 255, ...,  30,  36,  38]]], dtype=uint8)

In [7]:
# Explore the raster a bit more

raster1.shape

(3, 10980, 10980)

In [8]:
raster1[0]

array([[ 55,  62,  63, ...,  27,  28,  28],
       [ 59,  61,  65, ...,  26,  26,  28],
       [ 61,  63,  62, ...,  27,  27,  29],
       ...,
       [255, 255, 255, ...,  23,  24,  26],
       [255, 255, 255, ...,  22,  24,  26],
       [255, 255, 255, ...,  24,  27,  32]], dtype=uint8)

In [10]:
# The goal is to find out if the raster is just no-data values.
# The problem is that, after exploring in QGIS, we found that a "1"
# often signifies a no-data value, along with a "0." So we can't just
# count the zeroes.
# Instead, with help from this site: https://note.nkmk.me/en/python-numpy-count/
# I decided to add up the values in each band of the raster, and see if any of
# the resultant sums were **greater than 3**. After all, if every value in a particular
# pixel was 0 or 1 across all three bands, that means it would (a) be a no-data pixel
# and (b) if you add the bands, the sum would be less than or equal to 3.

raster1_combined = raster1[0] + raster1[1] + raster1[2]
raster1_combined

array([[199, 215, 210, ..., 102, 109, 109],
       [208, 208, 217, ..., 115, 106, 102],
       [211, 211, 213, ..., 104, 104, 114],
       ...,
       [253, 253, 253, ...,  98,  94, 104],
       [253, 253, 253, ...,  91,  94, 101],
       [253, 253, 253, ...,  93, 106, 116]], dtype=uint8)

In [11]:
raster1_combined.shape

(10980, 10980)

In [12]:
raster1_combined <= 3

# This results in an array with only True and False, the False values being pixels
# that are **greater than 3**. In other words, every True value in this array should
# signify a no-data pixel.

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [13]:
# Calculate the percent of the raster that's no-data by counting the nonzeroes,
# then dividing by the total number of values (i.e., the rows times the columns).

rows = raster1_combined.shape[0]
columns = raster1_combined.shape[1]

np.count_nonzero(raster1_combined <= 3) / (rows * columns)

0.0027510525844307086

In [14]:
# Finally, I combine all that work into a single function.

def detect_missing_pixels(filename, na_value=3):
    # Read the raster with rasterio
    with rio.open(filename) as f:
        raster = f.read()
        
    # Add up the values in all three color bands
    raster_combined = raster[0] + raster[1] + raster[2]
    rows = raster_combined.shape[0]
    columns = raster_combined.shape[1]
    
    # Calculate how many values in the combined raster are less than the given NA Value
    # (defaulted to 3), then divide by the total number of values to get a percentage.
    percent = np.count_nonzero(raster_combined <= na_value) / (rows * columns)
    
    return percent

In [15]:
# Test the function

detect_missing_pixels(file1_uri)

# Success!

0.0027510525844307086