In [None]:
import requests
from bs4 import BeautifulSoup
import os
from urllib.parse import urljoin

# Get the cookbook overview page
base_url = "https://www.ddvculinary.com"
start_url = f"{base_url}/freecookbooks.php"
response = requests.get(start_url)
soup = BeautifulSoup(response.text, "html.parser")

# Create folder for saving PDFs
save_folder = "pdfs"
os.makedirs(save_folder, exist_ok=True)

# Find all green button links (cookbook pages)
cookbook_pages = []
for a in soup.find_all("a", href=True):
    href = a["href"]
    if href.endswith("-cb.php"):  # Matches cookbook detail pages
        full_url = urljoin(start_url, href)
        cookbook_pages.append(full_url)

print(f"Found {len(cookbook_pages)} cookbook detail pages.")

# Visit each cookbook page and find the PDF link
for page_url in cookbook_pages:
    try:
        page_response = requests.get(page_url)
        page_soup = BeautifulSoup(page_response.text, "html.parser")

        # Find the first link to a PDF file
        pdf_url = None
        for a in page_soup.find_all("a", href=True):
            if ".pdf" in a["href"].lower():
                pdf_url = urljoin(page_url, a["href"])
                break

        if pdf_url:
            filename = os.path.basename(pdf_url.split("?")[0])

            # Download and save PDF
            pdf_response = requests.get(pdf_url)
            save_path = os.path.join(save_folder, filename)
            with open(save_path, "wb") as f:
                f.write(pdf_response.content)
        else:
            print("No PDF found on this page.")

    except Exception as e:
        print(f"Error: {e}")





Found 217 cookbook detail pages.


In [18]:
!pip uninstall -y PyPDF2
!pip install pypdf

import os
from pypdf import PdfReader, PdfWriter

# Input and output folders
input_folder = "pdfs"
output_folder = "merged"
output_file = "All_Cookbooks_Merged.pdf"
os.makedirs(output_folder, exist_ok=True)

# Create a writer instance
writer = PdfWriter()

# Get all PDF files in the input folder
pdf_files = sorted([f for f in os.listdir(input_folder) if f.lower().endswith(".pdf")])

# Merge PDFs using PdfWriter
for pdf in pdf_files:
    pdf_path = os.path.join(input_folder, pdf)
    print(f"Adding: {pdf}")
    try:
        reader = PdfReader(pdf_path)
        for page in reader.pages:
            writer.add_page(page)
    except Exception as e:
        print(f"Skipping {pdf} due to error: {e}")

# Write the merged PDF
output_path = os.path.join(output_folder, output_file)
with open(output_path, "wb") as f_out:
    writer.write(f_out)





Defaulting to user installation because normal site-packages is not writeable
Adding: 10-Creative-Quinoa-Recipes.pdf
Adding: 101-Camping-And-Outdoor-Recipes.pdf
Adding: 101-Pierogi-Filling-Ideas.pdf
Adding: 101-Recipes-For-The-Deep-Fryer.pdf
Adding: 101-Tips-And-Techniques-For-Cooking-Like-A-Chef.pdf
Adding: 20-Easy-International-Recipes.pdf
Adding: 200-Recipes-For-Italian-Dishes.pdf
Adding: 300-Chicken-Recipes.pdf
Adding: 300-Recipes-For-The-Grill.pdf
Adding: 400-Refreshing-Punch-Recipes.pdf
Adding: 5-Vietnamese-Recipes-To-Try-At-Home-From-Vietnam-Tourism-Board.pdf
Adding: A-Dinner-Of-16th-Century-Spain.pdf
Adding: A-Domestic-Cookbook-1866.pdf
Adding: A-Flavour-Of-Scotland.pdf
Adding: A-Little-Book-Of-Japanese-Recipes.pdf
Adding: A-Taste-Of-Italy.pdf
Adding: A-Taste-Of-Mexico.pdf
Adding: A-Taste-Of-Thai-Chicken-Recipes.pdf
Adding: A-Taste-Of-Thai-Coconut-Milk-Recipes.pdf
Adding: A-Taste-Of-Thai-Favorite-Recipes.pdf
Adding: A-Taste-Of-Thai-Rice-Noodle-Dishes.pdf
Adding: A-Taste-Of-Thai

Ignoring wrong pointing object 4 0 (offset 0)
Ignoring wrong pointing object 5 0 (offset 0)


Adding: The-Ultimate-Chicken-Wing-Cookbook.pdf
Adding: Tin-Foil-Cooking-Recipes.pdf
Adding: Top-Secret-Recipes.pdf
Adding: Traditional-Ukrainian-Dishes.pdf
Adding: Traditional-Ukrainian-Recipes.pdf
Adding: University-Of-California-Wine-Book.pdf
