In [2]:
from pathlib import Path
import fitz  # PyMuPDF


def extract_single_page_pdf(
    pdf_path,
    page_number,
    output_dir="data/test"
):
  """
  Extract a single page from a PDF and save it as a new PDF.

  Args:
      pdf_path (str | Path): Relative path to source PDF
      page_number (int): 1-based page number to extract
      output_dir (str | Path): Output directory
  """

  pdf_path = Path(pdf_path)
  output_dir = Path(output_dir)
  output_dir.mkdir(parents=True, exist_ok=True)

  # Open source PDF
  src = fitz.open(pdf_path)

  if not (1 <= page_number <= src.page_count):
    raise ValueError(f"Page number out of range: 1â€“{src.page_count}")

  # Create new PDF
  dst = fitz.open()
  dst.insert_pdf(src, from_page=page_number - 1, to_page=page_number - 1)

  # Build output filename
  new_name = f"{pdf_path.stem}--{page_number}.pdf"
  out_path = output_dir / new_name

  # Save
  dst.save(out_path)
  dst.close()
  src.close()

  return out_path

In [3]:
extract_single_page_pdf(
    r"./data/poster-pdfs/ocr-needed/49_LGS_Art_Poster_New.pdf",
    page_number=1
)

WindowsPath('data/test/49_LGS_Art_Poster_New--1.pdf')