In [6]:
from PyPDF2 import PdfReader, PdfWriter


def filter_pdf_pages(input_pdf, output_pdf, page_numbers):
    """
    Extracts specific pages from a PDF and saves them to a new PDF.

    Args:
        input_pdf (str): Path to the input PDF file.
        output_pdf (str): Path to the output PDF file.
        page_numbers (list): List of page numbers to extract (1-based).
    """
    pdf_reader = PdfReader(input_pdf)
    pdf_writer = PdfWriter()

    for page_num in page_numbers:
        if 1 <= page_num <= len(pdf_reader.pages):
            page = pdf_reader.pages[page_num - 1]
            pdf_writer.add_page(page)
        else:
            print(f"Warning: Page {page_num} does not exist in the PDF.")

    with open(output_pdf, "wb") as f:
        pdf_writer.write(f)


if __name__ == "__main__":
    input_pdf_path_T10 = "../data/T10.pdf"
    output_pdf_path_T10 = "../data/T10_select.pdf"
    pages_to_extract_T10 = [
        571,
        572,
        869,
        384,
        385,
        569,
        570,
        894,
        895,
        910,
        728,
        729,
        889,
        566,
        567,
        386,
        387,
        376,
        377,
        370,
        371,
        873,
        904,
        872,
        357,
        358,
        378,
        379,
        561,
        870,
        871,
        911,
        912,
        731,
        732,
        558,
        359,
        360,
        712,
        730,
        913,
        914,
        354,
        355,
        922,
        923,
        382,
        383,
        908,
        909,
        573,
        574,
    ]  # Example: Extract pages 1, 3, and 5

    input_pdf_path_T09 = "../data/T09.pdf"
    output_pdf_path_T09 = "../data/T09_select.pdf"
    pages_to_extract_T09 = [
        527,
        528,
        525,
        526,
        523,
        524,
        935,
        938,
        507,
        508,
        700,
        701,
        509,
        510,
        511,
        512,
        973,
        963,
        964,
        967,
        965,
        465,
        466,
        741,
        742,
        710,
        711,
        746,
        747,
        485,
        642,
        643,
        486,
        487,
        475,
        476,
        702,
        703,
        716,
        717,
        980,
        981,
        467,
        468,
        646,
        473,
        474,
        651,
        652,
        738,
        739,
        732,
        733,
        977,
        922,
        923,
        1000,
        1001,
        927,
        928,
        990,
        991,
        519,
        520,
        965,
        966,
        488,
        489,
        708,
        709,
        522,
        515,
        516,
        503,
        504,
        529,
        530,
        998,
        999,
        988,
        989,
        972,
        492,
        493,
        490,
        491,
        498,
        499,
        563,
        654,
        730,
        731,
        971,
        649,
        933,
        934,
        949,
    ]  # Example: Extract pages 1, 3, and 5

    filter_pdf_pages(input_pdf_path_T09, output_pdf_path_T09, pages_to_extract_T09)
    print(f"Filtered PDF saved to {output_pdf_path_T09}")

Filtered PDF saved to ../data/T09_select.pdf
