In [1]:
import importlib
import cookbook_lib

importlib.reload(cookbook_lib)
from cookbook_lib import *

In [2]:
import os
from cookbook_lib import *  # make sure export_master_html_site is in cookbook_lib
from collections import defaultdict

input_dir = "pdfs"
output_base = "output"

# Prep containers for merged master site
all_docs = []  # (doc, headings, source_name)
all_headings_flat = []  # [(title, source_name)]
all_indexes_flat = defaultdict(set)

# Also prepare TOC + index text versions
all_headings = []
ingredient_index_combined = defaultdict(set)

# Loop through each PDF file
for filename in os.listdir(input_dir):
    if filename.lower().endswith(".pdf"):
        pdf_path = os.path.join(input_dir, filename)
        doc = load_pdf(pdf_path)
        headings = detect_headings(doc)

        recipe_dir = os.path.join(output_base, f"Split_{os.path.splitext(filename)[0]}")
        html_dir = os.path.join(output_base, f"site_{os.path.splitext(filename)[0]}")

        print(split_recipes(doc, headings, recipe_dir))
        print(export_to_html(doc, headings, {}, html_dir))

        # For file-specific TOC and index
        all_headings.extend(
            [(f"{title} ({filename})", page) for title, page in headings]
        )
        ingredient_index = build_ingredient_index(doc, headings)
        for ingredient, titles in ingredient_index.items():
            ingredient_index_combined[ingredient].update(titles)

        # For global HTML site
        all_docs.append((doc, headings, filename))
        all_headings_flat.extend([(title, filename) for title, _ in headings])
        for ingredient, titles in ingredient_index.items():
            all_indexes_flat[ingredient].update(titles)

# Export the master HTML cookbook site
master_html_dir = os.path.join(output_base, "cookbook_site")
print(
    export_master_html_site(
        all_docs, all_headings_flat, all_indexes_flat, master_html_dir
    )
)

# Export global TOC and Index files
print(generate_toc(all_headings, os.path.join(output_base, "TOC.md")))
print(save_index(ingredient_index_combined, os.path.join(output_base, "Index.md")))

Split 19 recipes to: output\Split_Aroy
HTML cookbook created at: output\site_Aroy
Split 17 recipes to: output\Split_Asian Street Food
HTML cookbook created at: output\site_Asian Street Food
Split 15 recipes to: output\Split_Brasiliano 2024
HTML cookbook created at: output\site_Brasiliano 2024
Split 19 recipes to: output\Split_Coming Home
HTML cookbook created at: output\site_Coming Home
Split 20 recipes to: output\Split_Cooking Bliss
HTML cookbook created at: output\site_Cooking Bliss
Split 17 recipes to: output\Split_Cucina di Primavera
HTML cookbook created at: output\site_Cucina di Primavera
Split 18 recipes to: output\Split_Divora L'inverno
HTML cookbook created at: output\site_Divora L'inverno
Split 18 recipes to: output\Split_Do you Remember
HTML cookbook created at: output\site_Do you Remember
Split 23 recipes to: output\Split_Eat Asia
HTML cookbook created at: output\site_Eat Asia
Split 22 recipes to: output\Split_Escape to the South East
HTML cookbook created at: output\site_E