### Masking MODIS Land Cover Rasters to Maranhao State Boundary

This code takes the rectangular GeoTIFFs from Google Earth Engine and masks them to the exact Maranhao shapefile boundary, removing all the data outside the state borders.

Input: Raw GeoTIFFs from GEE which is a rectangular bounding box
Output: Masked GeoTIFFs which are clipped to Maranhao boundary

In [1]:
# importing the necessary libraries
import os
import glob
import rasterio
from rasterio.mask import mask
import geopandas as gpd
import numpy as np
from tqdm import tqdm

In [2]:
# adding all the paths
shapefile_path = "/shared_space/BrazilSPEI/Maranhao_boundaries_EPSG/maranhao_boundary.shp"
input_dir = "/shared_space/BrazilSPEI/MODIS/Raw_Data" # raw GEE downloads
output_dir = "/shared_space/BrazilSPEI/MODIS/Masked_Data" 

# create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok = True)

In [3]:
# loading Maranhao boundary
ma = gpd.read_file(shapefile_path)
print(f"Shapefile CRS: {ma.crs}")
print(f"Bounds: {ma.total_bounds}")
print(f"Area: {ma.geometry.area.sum():.2f} square degrees")

Shapefile CRS: {'init': 'epsg:4326'}
Bounds: [-48.75515074 -10.2617647  -41.79606907  -1.04932812]
Area: 26.90 square degrees


In [4]:
# finding all the input files
input_files = sorted(glob.glob(os.path.join(input_dir, "*.tif")))
print(f"Found {len(input_files)} GeoTIFF files to process")

Found 24 GeoTIFF files to process


In [5]:
# processing each raster
print("Masking rasters to Maranhao boundary..")

stats = {
    'processed': 0,
    'skipped': 0,
    'errors': 0
}

for input_path in tqdm(input_files, desc = 'Processing'):
    filename = os.path.basename(input_path)
    output_path = os.path.join(output_dir, filename.replace(".tif", "_masked.tif"))
    
    with rasterio.open(input_path) as src:
        # checking if the CRS matches
        if src.crs != ma.crs:
            print(f"CRS mismatch for {filename}")
            print(f"Raster: {src.crs}, Shapefile: {ma.crs}")
            ma_projected = ma.to_crs(src.crs)
        else:
            ma_reprojected = ma
            
        # getting original stats
        original_shape = (src.height, src.width)
        original_bounds = src.bounds
        
        # masking to the shapefile boundary
        out_image, out_transform = mask(
            src,
            ma_reprojected.geometry,
            crop = True,
            filled = True,
            nodata = src.nodata if src.nodata is not None else 0
        )
        
        # getting masked stats
        masked_shape = (out_image.shape[1], out_image.shape[2])
        
        # updating metadata
        out_meta = src.meta.copy()
        out_meta.update({
            "driver": "GTiff",
            "height": out_image.shape[1],
            "width": out_image.shape[2],
            "transform": out_transform
        })
        
        # writing masked raster
        with rasterio.open(output_path, "w", **out_meta) as dest:
            dest.write(out_image)
            
        # calculating size reduction
        original_pixels = original_shape[0] * original_shape[1]
        masked_pixels = masked_shape[0] * masked_shape[1]
        reduction = ((original_pixels - masked_pixels) / original_pixels) * 100
        
        print(f"{filename} completed")
        print(f"Original: {original_shape[0]}x{original_shape[1]} pixels")
        print(f"Masked: {masked_shape[0]}x{masked_shape[1]} pixels")
        print(f"Reduction: {reduction:.1f}%")
        
        stats["processed"] += 1

Processing:   0%|          | 0/24 [00:00<?, ?it/s]

Masking rasters to Maranhao boundary..


Processing:   4%|▍         | 1/24 [00:00<00:04,  4.62it/s]

MCD12Q1_Maranhao_2001.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%


Processing:  12%|█▎        | 3/24 [00:00<00:04,  4.74it/s]

MCD12Q1_Maranhao_2002.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%
MCD12Q1_Maranhao_2003.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%


Processing:  21%|██        | 5/24 [00:01<00:03,  4.99it/s]

MCD12Q1_Maranhao_2004.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%
MCD12Q1_Maranhao_2005.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%


Processing:  29%|██▉       | 7/24 [00:01<00:03,  4.97it/s]

MCD12Q1_Maranhao_2006.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%
MCD12Q1_Maranhao_2007.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%


Processing:  38%|███▊      | 9/24 [00:01<00:02,  5.11it/s]

MCD12Q1_Maranhao_2008.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%
MCD12Q1_Maranhao_2009.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%


Processing:  46%|████▌     | 11/24 [00:02<00:02,  5.08it/s]

MCD12Q1_Maranhao_2010.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%
MCD12Q1_Maranhao_2011.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%


Processing:  54%|█████▍    | 13/24 [00:02<00:02,  5.16it/s]

MCD12Q1_Maranhao_2012.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%
MCD12Q1_Maranhao_2013.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%


Processing:  62%|██████▎   | 15/24 [00:02<00:01,  5.11it/s]

MCD12Q1_Maranhao_2014.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%
MCD12Q1_Maranhao_2015.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%


Processing:  71%|███████   | 17/24 [00:03<00:01,  5.14it/s]

MCD12Q1_Maranhao_2016.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%
MCD12Q1_Maranhao_2017.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%


Processing:  75%|███████▌  | 18/24 [00:03<00:01,  5.16it/s]

MCD12Q1_Maranhao_2018.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%


Processing:  88%|████████▊ | 21/24 [00:04<00:00,  5.16it/s]

MCD12Q1_Maranhao_2019.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%
MCD12Q1_Maranhao_2020.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%
MCD12Q1_Maranhao_2021.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%


Processing:  92%|█████████▏| 22/24 [00:04<00:00,  5.17it/s]

MCD12Q1_Maranhao_2022.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%


Processing: 100%|██████████| 24/24 [00:04<00:00,  5.09it/s]

MCD12Q1_Maranhao_2023.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%
MCD12Q1_Maranhao_2024.tif completed
Original: 2056x1550 pixels
Masked: 2052x1550 pixels
Reduction: 0.2%





In [6]:
print("Masking Complete")
print(f"Processed: {stats['processed']} files")
print(f"Skipped: {stats['skipped']} files")
print(f"Errors: {stats['errors']} files")
print(f"Output directory: {output_dir}")

Masking Complete
Processed: 24 files
Skipped: 0 files
Errors: 0 files
Output directory: /shared_space/BrazilSPEI/MODIS/Masked_Data


In [10]:
print("Verifying one masked file...")
test_files = glob.glob(os.path.join(output_dir, "*_masked.tif"))

if test_files:
    test_file = test_files[0]
    with rasterio.open(test_file) as src:
        data = src.read(1)
        print(f"Test file: {os.path.basename(test_file)}")
        print(f"Shape: {data.shape}")
        print(f"CRS: {src.crs}")
        print(f"Bounds: {src.bounds}")
        print(f"Unique values: {np.unique(data)}")
        print(f"NoData value: {src.nodata}")
else:
    print("No masked files found to verify")

print("All done!")

Verifying one masked file...
Test file: MCD12Q1_Maranhao_2002_masked.tif
Shape: (2052, 1550)
CRS: EPSG:4326
Bounds: BoundingBox(left=-48.75606204558703, bottom=-10.263252121065534, right=-41.79411859366074, top=-1.0465373059992427)
Unique values: [ 0  1  2  4  5  6  7  8  9 10 11 12 13 14 16 17]
NoData value: None
All done!
