In [None]:
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
import matplotlib.pyplot as plt
import rasterio as rio
from rasterio.plot import show
from matplotlib_scalebar.scalebar import ScaleBar
import geopandas as gpd
from pathlib import Path
from datetime import datetime
import re
import json

In [None]:
def add_images_to_pdf(images_folder, pdf_path, gdf):
    """Plot all images in a directory to PDF"""
    c = canvas.Canvas(str(pdf_path), pagesize=A4)
    page_width, page_height = A4

    images = list(Path(images_folder).glob("*.tif"))
    images.sort(key=lambda img: int(img.stem.split("_")[-1]))

    for image_path in images:
        match = re.search(r"\d{14}", image_path.stem)
        if match:
            # Extract the first 8 digits (YYYYMMDD) from the match for the date
            datetime_str = match.group(0)[:8]
            datetime_obj = datetime.strptime(datetime_str, "%Y%m%d")
            formatted_date = datetime_obj.strftime("%d %b %Y")
        else:
            formatted_date = "Unknown Date"
        # Extract image_id from the file name (assuming format "_<id>.tif")
        image_id = image_path.stem.split("_")[-1]

        # Filter the GeoDataFrame for the current image_id
        gdf_filtered = gdf[gdf["image_id"] == int(image_id)]
        if not gdf_filtered.empty:
            business_name = gdf_filtered["USER_Name_of_business"].iloc[0]
            title = f"{image_id} - {business_name} ({formatted_date})"
        else:
            title = f"{image_id} ({formatted_date})"

        with rio.open(image_path) as src:

            fig, ax = plt.subplots(figsize=(9, 9))

            ax.set_title(title, pad=20)
            show(
                src, ax=ax, with_bounds=False
            )  # with_bounds=False to not alter aspect ratio

            # Add a scale bar
            scalebar = ScaleBar(0.5, units="m", location="lower right")
            ax.add_artist(scalebar)

            ax.set_axis_off()

            temp_png_path = image_path.with_suffix(".png")
            plt.savefig(temp_png_path, dpi=100)  # Specify DPI for image quality
            plt.close(fig)

            image_width_in_points = 9 * 72
            image_height_in_points = 9 * 72
            x_position = (page_width - image_width_in_points) / 2
            y_position = (page_height - image_height_in_points) / 2

            # Draw the image centered on the page
            c.drawImage(
                str(temp_png_path),
                x_position,
                y_position,
                width=image_width_in_points,
                height=image_height_in_points,
                preserveAspectRatio=True,
            )
            c.showPage()

            temp_png_path.unlink()

    c.save()

In [None]:
# Get images directory and centre point layer for plot title info
with open("config.json", "r") as file:
    config = json.load(file)

output_folder = Path(config["output_dir"])

input_file_gdb = Path(config["input_gdb"])

sites_gdf = gpd.read_file(input_file_gdb, layer="Registered_Sites_Merged_v2")

In [None]:
# List of image paths
image_folder = output_folder / "extracted_images"
images = list(image_folder.glob("*.tif"))

# Output PDF path
pdf_path = output_folder / "output_images.pdf"

# Create PDF
add_images_to_pdf(image_folder, pdf_path, sites_gdf)