<a href="https://colab.research.google.com/github/paintedpotato/pdf-to-pptx/blob/main/Pdf_to_pptx_converter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install PyPDF2 python-pptx

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting python-pptx
  Downloading python_pptx-1.0.2-py3-none-any.whl.metadata (2.5 kB)
Collecting XlsxWriter>=0.5.7 (from python-pptx)
  Downloading XlsxWriter-3.2.0-py3-none-any.whl.metadata (2.6 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_pptx-1.0.2-py3-none-any.whl (472 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.8/472.8 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading XlsxWriter-3.2.0-py3-none-any.whl (159 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m159.9/159.9 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: XlsxWriter, PyPDF2, python-pptx
Successfully installed PyPDF2-3.0.1 XlsxWriter-3.2.0 python-pptx-1.0.2


In [4]:
import re
import logging
import PyPDF2
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor

# Setup logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

# Function to create a slide with the verse
def add_verse_slide(prs, book_name, chapter, verse, text):
    logging.debug(f"Adding slide for {book_name} {chapter}:{verse}")
    slide_layout = prs.slide_layouts[5]  # Using a blank slide layout
    slide = prs.slides.add_slide(slide_layout)

    # Set background to navy blue
    slide.background.fill.solid()
    slide.background.fill.fore_color.rgb = RGBColor(0, 0, 128)  # Navy blue

    # Add title with book name, chapter, and verse
    title = slide.shapes.title or slide.shapes.add_textbox(Inches(0), Inches(0), Inches(10), Inches(1))
    title_tf = title.text_frame
    title_text = f"{book_name} {chapter}:{verse}"
    title_tf.text = title_text

    # Set title font and alignment
    title_paragraph = title_tf.paragraphs[0]
    title_paragraph.font.size = Pt(24)
    title_paragraph.font.bold = True
    title_paragraph.font.color.rgb = RGBColor(255, 255, 255)  # White text
    title_tf.paragraphs[0].alignment = PP_ALIGN.CENTER

    # Add verse text to slide
    text_box = slide.shapes.add_textbox(Inches(0.5), Inches(1.5), Inches(9), Inches(5))
    text_frame = text_box.text_frame
    text_frame.word_wrap = True
    p = text_frame.add_paragraph()
    p.text = text

    # Set text font and alignment
    p.font.size = Pt(32)  # Larger font for verse text
    p.font.color.rgb = RGBColor(255, 255, 255)  # White text
    p.alignment = PP_ALIGN.CENTER

# Function to process PDF and extract text
def extract_text_from_pdf(pdf_file_path):
    logging.debug(f"Extracting text from {pdf_file_path}")
    try:
        with open(pdf_file_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ""
            for page_num in range(len(reader.pages)):
                logging.debug(f"Extracting text from page {page_num + 1}")
                text += reader.pages[page_num].extract_text()
            return text
    except Exception as e:
        logging.error(f"Failed to extract text from PDF: {e}")
        return ""

# Function to process extracted text into chapters and verses
def process_scripture_text(text):
    logging.debug("Processing scripture text to extract chapters and verses")
    # Regular expression to find chapter and verse numbers (assuming 'Chapter:Verse' format)
    pattern = r"(\d+):(\d+)"  # Matching "Chapter:Verse"
    scripture_dict = {}
    current_chapter = None

    for line in text.splitlines():
        match = re.match(pattern, line)
        if match:
            current_chapter, verse = match.groups()
            logging.debug(f"Found chapter {current_chapter}, verse {verse}")
            scripture_dict[(current_chapter, verse)] = ""  # Initialize an empty string for the verse content
        elif current_chapter:
            scripture_dict[(current_chapter, verse)] += line.strip() + " "  # Append verse text
    return scripture_dict

# Main function to convert scripture PDF to PPTX
def convert_pdf_to_pptx(pdf_file_path, pptx_file_path, book_name):
    logging.debug(f"Starting conversion of {pdf_file_path} to {pptx_file_path}")

    # Extract text from the PDF
    text = extract_text_from_pdf(pdf_file_path)
    if not text:
        logging.error("No text extracted from the PDF.")
        return

    # Process the extracted text into chapters and verses
    verses_dict = process_scripture_text(text)
    if not verses_dict:
        logging.error("No chapters or verses found in the text.")
        return

    # Create PowerPoint presentation
    prs = Presentation()

    # Add slides for each verse
    for (chapter, verse), verse_text in verses_dict.items():
        logging.debug(f"Adding verse {chapter}:{verse}")
        add_verse_slide(prs, book_name, chapter, verse, verse_text)

    # Save the PowerPoint presentation
    logging.debug(f"Saving presentation to {pptx_file_path}")
    prs.save(pptx_file_path)
    logging.info(f"Presentation saved successfully at {pptx_file_path}")


# Example usage
pdf_file_path = 'Book_of_Enoch.pdf'  # Path to your scripture PDF file
pptx_file_path = '1_Enoch.pptx'  # Output path for the PowerPoint
book_name = '1 Enoch'  # Example book name

convert_pdf_to_pptx(pdf_file_path, pptx_file_path, book_name)


ERROR:root:No chapters or verses found in the text.
