In [13]:
import rasterio
from rasterio.merge import merge
from rasterio.mask import mask
import geopandas as gpd
import os
from tqdm import tqdm

In [14]:
def find_tif_files_recursive(root_dir):
    tif_files = []
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename.lower().endswith(".tif"):
                full_path = os.path.join(dirpath, filename)
                tif_files.append(full_path)
    return tif_files

In [18]:
def merge_and_clip_tifs(tif_paths, shapefile_path, output_path):
    # Open all tif files with tqdm progress bar
    print("Opening TIF files...")
    src_files_to_mosaic = [rasterio.open(fp) for fp in tqdm(tif_paths, desc="Reading TIFs")]

    # Merge into a single raster
    print("Merging rasters...")
    mosaic, mosaic_transform = merge(src_files_to_mosaic)

    # Copy metadata from one of the source files and update it for the mosaic
    mosaic_meta = src_files_to_mosaic[0].meta.copy()
    mosaic_meta.update({
        "driver": "GTiff",
        "height": mosaic.shape[1],
        "width": mosaic.shape[2],
        "transform": mosaic_transform,
        "count": mosaic.shape[0]
    })

    # Load shapefile and get geometry
    print("Reading shapefile...")
    shape = gpd.read_file(shapefile_path)
    shape = shape.to_crs(src_files_to_mosaic[0].crs)  # Match CRS
    geometry = shape.geometry.values

    # Clip the mosaic
    print("Clipping mosaic...")
    with rasterio.io.MemoryFile() as memfile:
        with memfile.open(**mosaic_meta) as dataset:
            dataset.write(mosaic)
            clipped, clipped_transform = mask(dataset=dataset, shapes=geometry, crop=True)

    # Update metadata for clipped output
    clipped_meta = mosaic_meta.copy()
    clipped_meta.update({
        "height": clipped.shape[1],
        "width": clipped.shape[2],
        "transform": clipped_transform
    })

    # Write the clipped image to file
    print("Writing output raster...")
    with rasterio.open(output_path, "w", **clipped_meta) as dest:
        dest.write(clipped)

    print(f"Saved clipped raster to: {output_path}")

In [26]:
def switch_backslashes_to_slashes(strings):
    return [s.replace("\\", "/") for s in strings]

In [34]:
tif_paths = switch_backslashes_to_slashes(find_tif_files_recursive("I:/terrain_generation_project/NAIP"))

print("File paths for merging:")
for i, file in enumerate(tif_paths):
    print(f"TIF {i+1}: {file}")

File paths for merging:
TIF 1: I:/terrain_generation_project/NAIP/m_3411855_ne_11_060_20220505/m_3411855_ne_11_060_20220505.tif
TIF 2: I:/terrain_generation_project/NAIP/m_3411855_nw_11_060_20220505/m_3411855_nw_11_060_20220505.tif
TIF 3: I:/terrain_generation_project/NAIP/m_3411855_se_11_060_20220511/m_3411855_se_11_060_20220511.tif
TIF 4: I:/terrain_generation_project/NAIP/m_3411855_sw_11_060_20220511/m_3411855_sw_11_060_20220511.tif
TIF 5: I:/terrain_generation_project/NAIP/m_3411856_ne_11_060_20220507/m_3411856_ne_11_060_20220507.tif
TIF 6: I:/terrain_generation_project/NAIP/m_3411856_nw_11_060_20220505/m_3411856_nw_11_060_20220505.tif
TIF 7: I:/terrain_generation_project/NAIP/m_3411856_se_11_060_20220512/m_3411856_se_11_060_20220512.tif
TIF 8: I:/terrain_generation_project/NAIP/m_3411856_sw_11_060_20220512/m_3411856_sw_11_060_20220512.tif


Due to the computational load of merging and clipping the tif files, we performed the operations one at a time, modifying the inputs as we went along. Note that the following cell was run iteratively, and the current state just represents the last iteration. 

In [46]:
# 3 + 4
tif_list = [
    "I:/terrain_generation_project/NAIP_processed/naip_tiles_1_4.tif",
    "I:/terrain_generation_project/NAIP_processed/naip_tiles_5_8.tif"
    ]

clip_shapefile = "I:/terrain_generation_project/tif_extent/tif_extent.shp"
output_tif = "I:/terrain_generation_project/NAIP_processed/naip_tiles_1_8.tif"

In [47]:
merge_and_clip_tifs(tif_list, clip_shapefile, output_tif)

Opening TIF files...


Reading TIFs: 100%|██████████| 2/2 [00:00<00:00, 15.19it/s]

Merging rasters...



  data = src.read(


Reading shapefile...
Clipping mosaic...
Writing output raster...
Saved clipped raster to: I:/terrain_generation_project/NAIP_processed/naip_tiles_1_8.tif
