<a href="https://colab.research.google.com/github/liangchow/zindi-amazon-secret-runway/blob/main/utils/Generate_airstrip_masks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports and Setup.

In [1]:
%%capture
!pip -q install rasterio
!pip -q install gdown

In [2]:
import rasterio
import geopandas as gpd
from rasterio.features import rasterize
import numpy as np
import os
from shapely.geometry import LineString
from shapely.ops import transform
from pyproj import Transformer
import gdown
import shutil

## Create folder to store masks in Colab Runtime
This folder is deleted at the end of your session.

In [3]:
# Define the path for the new folder
working_path = '/content/masks'

# Create the folder if it doesn't already exist
if not os.path.exists(working_path):
    os.makedirs(working_path)
    print(f"Folder created: {working_path}")
else:
    print(f"Folder already exists: {working_path}")

Folder created: /content/masks


# Download data to local compute node

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Download training images from your Google Drive

**Note**: If you are working with your own data, edit the path to your training images in the cell below

In [5]:
# Navigate to the shared directory
%cd /content/drive/MyDrive/Zindi-Amazon/training

# Zip the data
!zip -r /content/images.zip images

# Unzip the files
!unzip /content/images.zip -d /content

# Navigate back to content
%cd /content

/content/drive/.shortcut-targets-by-id/14mw0v8Bi-MzhsqSI0K3KO23YrUHttM7P/Zindi-Amazon/training
  adding: images/ (stored 0%)
  adding: images/Sentinel_AllBands_Training_Id_136.tif (deflated 49%)
  adding: images/Sentinel_AllBands_Training_Id_137.tif (deflated 49%)
  adding: images/Sentinel_AllBands_Training_Id_138.tif (deflated 48%)
  adding: images/Sentinel_AllBands_Training_Id_139.tif (deflated 48%)
  adding: images/Sentinel_AllBands_Training_Id_143.tif (deflated 49%)
  adding: images/Sentinel_AllBands_Training_Id_147.tif (deflated 49%)
  adding: images/Sentinel_AllBands_Training_Id_182.tif (deflated 48%)
  adding: images/Sentinel_AllBands_Training_Id_114.tif (deflated 5%)
  adding: images/Sentinel_AllBands_Training_Id_71.tif (deflated 5%)
  adding: images/Sentinel_AllBands_Training_Id_151.tif (deflated 5%)
  adding: images/Sentinel_AllBands_Training_Id_57.tif (deflated 5%)
  adding: images/Sentinel_AllBands_Training_Id_98.tif (deflated 4%)
  adding: images/Sentinel_AllBands_Training

## Connect to team GitHub Repo

Clone the main branch of TerraPulse team's GitHub repo to access files

In [6]:
# clone the main branch from GitHub to get all the data and files from there onto the current runtime session
!apt-get install git
!git clone https://github.com/liangchow/zindi-amazon-secret-runway.git
!git pull # pulls the latest changes from repo

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git is already the newest version (1:2.34.1-1ubuntu1.11).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.
Cloning into 'zindi-amazon-secret-runway'...
remote: Enumerating objects: 503, done.[K
remote: Counting objects: 100% (115/115), done.[K
remote: Compressing objects: 100% (81/81), done.[K
remote: Total 503 (delta 68), reused 35 (delta 34), pack-reused 388 (from 1)[K
Receiving objects: 100% (503/503), 64.14 MiB | 22.75 MiB/s, done.
Resolving deltas: 100% (236/236), done.
fatal: not a git repository (or any of the parent directories): .git


## Download training airstrips

In [7]:
# Load the polygon shapefile
airstrips_gdf = gpd.read_file("/content/zindi-amazon-secret-runway/Data_Visualization/data/pac_2024_training/pac_2024_training.shp")

# Functions for creating binary masks

In [8]:
# Define buffer distance
buffer_distance = 20

In [9]:
def check_mask_values(mask):
    """
    Checks whether a binary mask contains both 0 and 1 values, used for validating binary masks.

    Args:
      mask (np.ndarray): A NumPy array representing the binary mask, where values are expected to be 0 or 1.

    Returns:
      bool: True if the mask contains both 0 and 1 values; False otherwise.

    Prints:
      - "The mask contains both 0 and 1." if both values are present.
      - "The mask contains only 0." if only 0 is present.
      - "The mask contains only 1." if only 1 is present.
      - "The mask does not contain 0 or 1." if neither value is found.
    """

    has_zero = np.any(mask == 0)
    has_one = np.any(mask == 1)

    if has_zero and has_one:
        print("The mask contains both 0 and 1.")
        return True
    elif has_zero:
        print("The mask contains only 0.")
        return False
    elif has_one:
        print("The mask contains only 1.")
        return False
    else:
        print("The mask does not contain 0 or 1.")
        return False

In [10]:
def reproject_geometry(geometry, from_crs, to_crs):
    """
    Reprojects a given geometry from one coordinate reference system (CRS) to another.

    Args:
      geometry (shapely.geometry.base.BaseGeometry): The geometry object to reproject (e.g., Polygon, Point).
      from_crs (str): The source CRS of the geometry, specified as an EPSG code (e.g., 'EPSG:4326')
      to_crs (str): The target CRS to reproject the geometry to, specified as an EPSG code

    Returns:
      shapely.geometry.base.BaseGeometry: The reprojected geometry in the target CRS.
    """

    transformer = Transformer.from_crs(from_crs, to_crs, always_xy=True)
    return transform(transformer.transform, geometry)

# Create binary mask

The code below does the following for each airstrip:
- Check if there is a corresponding training image
- Buffer the airstrip polyline by `buffer_distance`
- Create a binary mask with the same dimensions as the training image
- Set all pixel values inside the buffer to 1 and outside the buffer to 0


In [11]:
# Iterate over each airstrip to create individual mask rasters
for idx, row in airstrips_gdf.iterrows():
    # Get the airstrip ID
    airstrip_id = row['id']

    # Check for an associated TIFF file
    img = f"/content/images/Sentinel_AllBands_Training_Id_{airstrip_id}.tif"

    if not os.path.exists(img):
        print(f"No TIFF file found for airstrip ID: {airstrip_id}")
    else:
        print(f"Creating mask for airstrip ID: {airstrip_id}")

        # Load the TIFF file and read it
        with rasterio.open(img) as img_src:
            # Get the raster CRS
            img_crs = img_src.crs
            print(f"Raster CRS: {img_crs}")

            # Get the raster metadata (to match extent and resolution)
            img_meta = img_src.meta.copy()
            # Create an affine transformation
            img_transform = img_src.transform
            # Get raster dimensions
            img_width = img_src.width
            img_height = img_src.height

            # Reproject the airstrip to match the raster CRS
            airstrip_crs = airstrips_gdf.crs  # Assuming the GeoDataFrame has a CRS
            reprojected_airstrip = reproject_geometry(row.geometry, airstrip_crs, img_crs)

            # Create a buffer around the reprojected airstrip
            buffered_airstrip = reprojected_airstrip.buffer(buffer_distance)

            # Initialize a blank mask for the current polygon
            mask = np.zeros((img_height, img_width), dtype='uint8')

            # Set mask to 1 where the airstrip buffer is present
            shape = [(buffered_airstrip, 1)]
            mask = rasterize(
                shapes=shape,
                out_shape=(img_height, img_width),
                transform=img_transform,
                fill=0,   # Assign 0 to areas outside the polygon
                dtype='uint8',
            )

            # Update metadata for the output raster
            out_meta = img_meta.copy()
            out_meta.update({
                "count": 1,
                "dtype": "uint8",
                "nodata": None  # Disable nodata (otherwise QGIS will not display the 0s)
            })

            check_mask_values(mask)

            # Save the individual mask raster

            # write to google colab runtime
            # output_raster = f"Mask_Buffer{buffer_distance}m_Id_{airstrip_id}.tif"

            # write to shared folder on google drive
            output_raster = f"/content/masks/Mask_Buffer{buffer_distance}m_Id_{airstrip_id}.tif"
            with rasterio.open(output_raster, "w", **out_meta) as dest:
                dest.write(mask, 1)

            print(f"Saved mask for airstrip ID {airstrip_id} as {output_raster}")

            # Uncomment and update the following lines to copy mask to the shared Google Drive folder
            # shutil.copy(output_raster, shared_folder_path)
            # print(f"Saved mask for airstrip ID {airstrip_id} {shared_folder_path}")

Creating mask for airstrip ID: 1
Raster CRS: EPSG:32719
The mask contains both 0 and 1.
Saved mask for airstrip ID 1 as /content/masks/Mask_Buffer20m_Id_1.tif
Creating mask for airstrip ID: 2
Raster CRS: EPSG:32719
The mask contains both 0 and 1.
Saved mask for airstrip ID 2 as /content/masks/Mask_Buffer20m_Id_2.tif
No TIFF file found for airstrip ID: 3
No TIFF file found for airstrip ID: 4
No TIFF file found for airstrip ID: 5
No TIFF file found for airstrip ID: 6
No TIFF file found for airstrip ID: 7
No TIFF file found for airstrip ID: 8
Creating mask for airstrip ID: 9
Raster CRS: EPSG:32719
The mask contains both 0 and 1.
Saved mask for airstrip ID 9 as /content/masks/Mask_Buffer20m_Id_9.tif
Creating mask for airstrip ID: 10
Raster CRS: EPSG:32719
The mask contains both 0 and 1.
Saved mask for airstrip ID 10 as /content/masks/Mask_Buffer20m_Id_10.tif
Creating mask for airstrip ID: 11
Raster CRS: EPSG:32719
The mask contains both 0 and 1.
Saved mask for airstrip ID 11 as /content/ma