# Remove NaN columns in GeoTiFF files

This code was necessary to fix a few images that included an extra column filled wiht NaN values. The extra columns (left or right side of image) were created by Google Earth Engine. Not sure why this happened, but it didn't impact the bounding box of the image. This as probably caused by a floaing point calculation when converting between CRS.

In [1]:
import rasterio
import numpy as np

In [63]:
def remove_nan_column(geotiff_path, output_path, remove='first'):
    """
    Remove the first or last column from all bands in a GeoTIFF file.

    Args:
        geotiff_path (str): Path to the input GeoTIFF file.
        output_path (str): Path to the output GeoTIFF file.
        remove (str): 'first' to remove the first column, 'last' to remove the last column.
    """

    # Open the GeoTIFF file for reading
    with rasterio.open(geotiff_path) as src:
        # Read the original image data
        image_data = src.read()  # shape: (num_bands, height, width)

        # Get band descriptions (band names)
        band_descriptions = src.descriptions

        # Check the dimensions
        _, height, width = image_data.shape

        # Decide whether to remove the first or last column
        if remove == 'first':
            new_image_data = image_data[:, :, 1:]  # Remove first column
            # Update the transform for the removed column
            new_transform = src.transform * rasterio.Affine.translation(1, 0)
        elif remove == 'last':
            new_image_data = image_data[:, :, :-1]  # Remove last column
            new_transform = src.transform
        else:
            raise ValueError("Invalid value for 'remove'. Use 'first' or 'last'.")

        # Update metadata
        new_meta = src.meta.copy()
        new_meta.update({
            "width": new_image_data.shape[2],  # New width
            "transform": new_transform,  # Adjust the transform for geospatial coordinates
            "descriptions": band_descriptions  # Retain band descriptions (names)
        })

        # Write the modified image data to a new GeoTIFF file
        with rasterio.open(output_path, 'w', **new_meta) as dst:
            dst.write(new_image_data)
            # Set the descriptions to maintain band names
            dst.descriptions = band_descriptions


    
    print(f"Updated GeoTIFF saved to {output_path}")


In [115]:
# Example usage:
id = 182
geotiff_path = f'/Volumes/Macintosh HD/Users/fortin/Downloads/Sentinel_AllBands_Training_Id_{id}.tif'
mask_path = f'/Volumes/Macintosh HD/Users/fortin/Downloads/masks/Mask_Buffer50m_Id_{id}.tif'
geo_output_path = f'/Volumes/Macintosh HD/Users/fortin/Downloads/Sentinel_AllBands_Training_Id_{id}_fix.tif'
mask_output_path = f'/Volumes/Macintosh HD/Users/fortin/Downloads/masks/Mask_Buffer50m_Id_{id}_fix.tif'

remove_nan_column(geotiff_path, geo_output_path, remove='last')  # You can use 'last' instead of 'first'
remove_nan_column(mask_path, mask_output_path, remove='last')  # You can use 'last' instead of 'first'

Updated GeoTIFF saved to /Volumes/Macintosh HD/Users/fortin/Downloads/Sentinel_AllBands_Training_Id_182_fix.tif
Updated GeoTIFF saved to /Volumes/Macintosh HD/Users/fortin/Downloads/masks/Mask_Buffer50m_Id_182_fix.tif
