---
title: Sentinel-2 burn scar analysis
---

# Sentinel-2 burn scar analysis

> To Do:
> - [ ] Convert script to notebook
> - [ ] Integrate outputs/figures in document

In [None]:
import geopandas as gpd
import rasterio as rio
from pathlib import Path


import boto3
import pystac_client

from rasterio.mask import mask


In [None]:
def search_sentinel_data(
    aoi_path,
    time_range=("2025-01-01", "2025-01-31"),
    collections=["ga_s2am_ard_3", "ga_s2bm_ard_3", "ga_s2cm_ard_3"],
    cloud_cover_threshold=10,
    stac_api_url="https://explorer.dea.ga.gov.au/stac/",
):
    # Read the AOI file
    aoi = gpd.read_file(aoi_path)

    # Ensure the AOI is in the right CRS for searching (WGS84)
    if aoi.crs != "EPSG:4326":
        aoi = aoi.to_crs("EPSG:4326")

    # Get the geometry as GeoJSON
    geom = aoi.geometry.values[0].__geo_interface__

    # Create a STAC client

    catalog = pystac_client.Client.open(stac_api_url)

    search = catalog.search(
        collections=collections,
        datetime=f"{time_range[0]}/{time_range[1]}",
        intersects=geom,
        max_items=100,  # Adjust as needed
    )

    # Get all items
    items = list(search.get_all_items())

    # Filter by cloud cover
    filtered_items = []
    for item in items:
        # Extract cloud cover property
        # Different collections might store cloud cover differently
        cloud_cover = None

        # Try different cloud cover property names
        for prop_name in ["eo:cloud_cover", "cloud_cover", "cloudy_pixel_percentage"]:
            if prop_name in item.properties:
                cloud_cover = item.properties[prop_name]
                break

        # If we found cloud cover and it's below threshold, add to filtered list
        if cloud_cover is not None and cloud_cover <= cloud_cover_threshold:
            filtered_items.append(item)

    return filtered_items

In [None]:
def download_sentinel_data(
    items, output_dir, bands=["red", "green", "blue", "nir"], aoi_path=None
):
    # Create output directory if it doesn't exist
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    # Set up boto3 client for S3 downloads
    s3 = boto3.client("s3")

    # Track downloaded files
    downloaded_files = {}

    # Read the AOI if provided
    aoi = None
    if aoi_path:
        aoi = gpd.read_file(aoi_path)
        if aoi.crs != "EPSG:4326":
            aoi = aoi.to_crs("EPSG:4326")

    # Loop through each item
    for item in items:
        item_id = item.id
        downloaded_files[item_id] = {}

        # Loop through each band
        for band in bands:
            # Find the asset for the band
            if band in item.assets:
                asset = item.assets[band]

                # Get the S3 URL
                href = asset.href

                # Parse S3 URL to get bucket and key
                if href.startswith("s3://"):
                    parts = href.replace("s3://", "").split("/")
                    bucket = parts[0]
                    key = "/".join(parts[1:])

                    # Define output file path
                    out_file = output_path / f"{item_id}_{band}.tif"

                    try:
                        # Download the file
                        s3.download_file(bucket, key, str(out_file))

                        # If AOI is provided, clip the raster
                        if aoi is not None:
                            clipped_file = output_path / f"{item_id}_{band}_clipped.tif"

                            with rio.open(out_file) as src:
                                # Reproject AOI to match the raster if needed
                                aoi_reprojected = aoi.to_crs(src.crs)

                                # Perform the clip
                                out_image, out_transform = mask(
                                    src, aoi_reprojected.geometry, crop=True
                                )

                                # Copy the metadata from the source
                                out_meta = src.meta.copy()

                                # Update metadata with new dimensions
                                out_meta.update(
                                    {
                                        "driver": "GTiff",
                                        "height": out_image.shape[1],
                                        "width": out_image.shape[2],
                                        "transform": out_transform,
                                    }
                                )

                                # Write the clipped raster
                                with rio.open(clipped_file, "w", **out_meta) as dest:
                                    dest.write(out_image)

                                # Replace the original file with clipped
                                os.remove(out_file)
                                out_file = clipped_file

                        # Add to the list of downloaded files
                        downloaded_files[item_id][band] = str(out_file)

                    except Exception as e:
                        print(f"Error downloading {band} band for {item_id}: {str(e)}")
                else:
                    print(f"Asset URL for {band} is not an S3 URL: {href}")
            else:
                print(f"Band {band} not found in assets for item {item_id}")

    return downloaded_files

In [None]:
def create_rgb_composite(downloaded_files, output_dir, suffix="_rgb_composite"):
    """
    Create RGB composites from downloaded Sentinel-2 bands.

    Parameters:
    -----------
    downloaded_files : dict
        Dictionary of downloaded file paths by item ID and band.
    output_dir : str
        Directory to save composite files.
    suffix : str
        Suffix to add to the output file names.

    Returns:
    --------
    list
        List of paths to the created RGB composites.
    """
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    composites = []

    for item_id, bands in downloaded_files.items():
        # Check if we have all the RGB bands
        if all(b in bands for b in ["red", "green", "blue"]):
            logger.info(f"Creating RGB composite for {item_id}")

            # Define output file path
            out_file = output_path / f"{item_id}{suffix}.tif"

            try:
                # Open the individual bands
                with (
                    rasterio.open(bands["red"]) as red_src,
                    rasterio.open(bands["green"]) as green_src,
                    rasterio.open(bands["blue"]) as blue_src,
                ):
                    # Read the data
                    red_data = red_src.read(1)
                    green_data = green_src.read(1)
                    blue_data = blue_src.read(1)

                    # Create the RGB array (channels first for rasterio)
                    rgb_data = np.stack([red_data, green_data, blue_data])

                    # Get metadata from one of the bands
                    out_meta = red_src.meta.copy()

                    # Update metadata for 3 bands
                    out_meta.update({"count": 3, "driver": "GTiff"})

                    # Write the composite
                    with rasterio.open(out_file, "w", **out_meta) as dest:
                        dest.write(rgb_data)

                    composites.append(str(out_file))
                    logger.info(f"RGB composite created at {out_file}")

            except Exception as e:
                logger.error(f"Error creating RGB composite for {item_id}: {str(e)}")
        else:
            logger.warning(f"Not all RGB bands available for {item_id}")

    return composites