In [None]:
from ceda_sentinel.s2_ard_links import find_image_links
from ceda_sentinel.s2_ard_read_write import *
import pandas as pd

- Input a date range and a polygon layer for area(s) of interest
- Find Sentinel 2 tile names that intersect area(s) of interest
- Find CEDA Sentinel 2 ARD folders that are within that date range
- Find images that are not too cloudy overall
- Join links to intersecting images to area of interest polygons
- Clip the images that intersect the area(s) of interest polygons and save geotiff of the interecting area

In [None]:
# The root url where CEDA S2 ARD images are found
base_url = "https://data.ceda.ac.uk/neodc/sentinel_ard/data/sentinel_2"

In [None]:
# The start date and end date over which to search for images
# Longer date range means more chance of cloud-free coverage, but also means longer spent searching
start_date = "2023-05-01"
end_date = "2023-05-31"

In [None]:
# Read area of interest file gdb layer
input_file_gdb = Path(
    "X:/OpenJobs/job6324rs/MXDs/job6324rs - Planning application boundaries trial/job6324rs - Planning application boundaries trial.gdb"
)
sites_gdf = gpd.read_file(input_file_gdb, layer="Registered_Sites_Merged_v2")
# Remove empty geometry
sites_gdf = sites_gdf[~sites_gdf.geometry.is_empty]

In [None]:
# Define function to convert points -> buffer -> bounding box
def points_to_buffer_box(gdf, buffer_distance=500):
    """Buffer points by distance and convert to bounding boxes"""

    # Check if all geometries are Points

    if not all(gdf.geometry.geom_type == "Point"):

        print("All geometries in the GeoDataFrame must be Points. Exiting.")

        return None

    # Reproject to EPSG 27700

    gdf = gdf.to_crs(epsg=27700)

    # Buffer the geometries

    gdf["geometry"] = gdf.geometry.buffer(buffer_distance)

    # Convert buffers to bounding boxes

    gdf["geometry"] = gdf.geometry.apply(lambda geom: box(*geom.bounds))

    return gdf

In [None]:
aoi_box_gdf = points_to_buffer_box(sites_gdf, buffer_distance=500)

In [None]:
# Just use the first 50 image ids for now
aoi_box_gdf = aoi_box_gdf[aoi_box_gdf["image_id"] <= 50]

In [None]:
# Process the AOI polygons:
# 1. Find a list of S2 image tile names that intersect the AOI
# 2. Find all CEDA S2 XML metadata file links within the date range containing those tile names.
# 3. Read each image metadata XML and if image not too cloudy extract image extent.
# 4. Spatial join image extent to AOI polygons. If more than one matching image, multiple rows are created in resulting geodataframe.
# Cloud Cover in the XML metadata seems to be in the 0 - 1 range, so default of 0.4 implies must be 40% or less to retain the image
aoi_image_gdf = find_image_links(
    aoi_box_gdf, start_date, end_date, base_url, cloud_cover_max=0.1
)

In [None]:
# Save this to pickle so faster for testing PDF plotting options
aoi_image_gdf.to_pickle("outputs/aoi_image_gdf.pkl")

In [None]:
# Read the pickle
aoi_image_gdf = pd.read_pickle("outputs/aoi_image_gdf.pkl")

In [None]:
# Let's just take one image per id for now
aoi_image_gdf = aoi_image_gdf.groupby("image_id", as_index=False).first()

In [None]:
# Plot one of the images for one AOI
plot_sample_image(aoi_image_gdf, plot_row=0)

In [None]:
# Write all window images to disk, by default they will be written to the outputs folder in this repo
write_s2_windows_to_tif(
    aoi_image_gdf, band_idx_list=[1, 2, 3], aoi_id_column="image_id"
)

## Part two create pdf of RGB images

In [None]:
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
import matplotlib.pyplot as plt
import rasterio as rio
from rasterio.plot import show
from matplotlib_scalebar.scalebar import ScaleBar
from pathlib import Path
from datetime import datetime
import re
import numpy as np


def add_images_to_pdf(images_folder, pdf_path, gdf):
    """Plot all images in a directory to PDF"""
    c = canvas.Canvas(str(pdf_path), pagesize=A4)
    page_width, page_height = A4

    images = list(Path(images_folder).glob("*.tif"))
    images.sort(key=lambda img: int(img.stem.split("_")[0]))

    for image_path in images:
        match = re.search(r"\d{14}", image_path.stem)
        if match:
            # Extract the first 8 digits (YYYYMMDD) from the match for the date
            datetime_str = match.group(0)[:8]
            datetime_obj = datetime.strptime(datetime_str, "%Y%m%d")
            formatted_date = datetime_obj.strftime("%d %b %Y")
        else:
            formatted_date = "Unknown Date"
        # Extract image_id from the file name (assuming format "_.tif")
        image_id = image_path.stem.split("_")[0]

        # Filter the GeoDataFrame for the current image_id
        gdf_filtered = gdf[gdf["image_id"] == int(image_id)]
        if not gdf_filtered.empty:
            business_name = gdf_filtered["USER_Name_of_business"].iloc[0]
            title = f"{image_id} - {business_name} ({formatted_date})"
        else:
            title = f"{image_id} ({formatted_date})"

        with rio.open(image_path) as src:

            fig, ax = plt.subplots(figsize=(9, 9))

            ax.set_title(title, pad=20)
            img_arr = src.read()
            img_arr = np.clip(img_arr, a_min=None, a_max=255)
            show(
                img_arr[[2, 1, 0], :, :], ax=ax, with_bounds=False, adjust=True
            )  # with_bounds=False to not alter aspect ratio

            # Add a scale bar
            scalebar = ScaleBar(10, units="m", location="lower right", fixed_value=200)
            ax.add_artist(scalebar)

            ax.set_axis_off()

            temp_png_path = image_path.with_suffix(".png")
            plt.savefig(temp_png_path, dpi=100)  # Specify DPI for image quality
            plt.close(fig)

            image_width_in_points = 9 * 72
            image_height_in_points = 9 * 72
            x_position = (page_width - image_width_in_points) / 2
            y_position = (page_height - image_height_in_points) / 2

            # Draw the image centered on the page
            c.drawImage(
                str(temp_png_path),
                x_position,
                y_position,
                width=image_width_in_points,
                height=image_height_in_points,
                preserveAspectRatio=True,
            )
            c.showPage()

            temp_png_path.unlink()

    c.save()

In [None]:
# List of image paths
image_folder = Path("outputs")
images = list(image_folder.glob("*.tif"))

# Output PDF path
pdf_path = image_folder / "sentinel_images.pdf"

# Create PDF
add_images_to_pdf(image_folder, pdf_path, sites_gdf)