In [22]:
import re

def create_html_pairs(latin_path, spanish_path, output_path):
    # Initialize variables for processing
    pairs = []  # Store tuples of (Latin paragraph, Spanish paragraph)
    toc = "<ul>"  # Begin the table of contents
    document_title, author, translator = "", "", ""
    
    # Process the files and create pairs of paragraphs
    with open(latin_path, 'r', encoding='utf-8') as latin_file, open(spanish_path, 'r', encoding='utf-8') as spanish_file:
        latin_lines = [line.strip() for line in latin_file if line.strip()]
        spanish_lines = [line.strip() for line in spanish_file if line.strip()]
        
        # Metadata extraction from the Latin file (assuming first lines contain metadata)
        metadata_lines = 3  # Assuming first 3 lines are metadata
        metadata = dict(line.split(": ", 1) for line in latin_lines[:metadata_lines])
        document_title = metadata.get('Title', '')
        author = metadata.get('Author', '')
        translator = metadata.get('Translator', '')
        
        # Adjust lists to skip metadata
        latin_lines = latin_lines[metadata_lines:]
        spanish_lines = spanish_lines[metadata_lines:]  # Assuming Spanish file has the same structure
        
        for latin_line, spanish_line in zip(latin_lines, spanish_lines):
            # Handle main title with <h1> for document title and major sections
            if latin_line.startswith('# ') and spanish_line.startswith('# '):
                document_title = latin_line.strip('# ')
                pairs.append((f"<h1>{latin_line.strip('# ')}</h1>", f"<h1>{spanish_line.strip('# ')}</h1>"))
            # Handle subheadings with <h3> that indicate specific chapters or sections
            elif latin_line.startswith('###') and spanish_line.startswith('###'):
                chapter_title = latin_line.strip('# ')
                toc += f"<li><a href='#{chapter_title}'>{chapter_title}</a></li>"
                pairs.append((f"<h3 id='{chapter_title}'>{latin_line.strip('### ').strip()}</h3>", f"<h3>{spanish_line.strip('### ').strip()}</h3>"))
            # Handle secondary headings with <h2>
            elif latin_line.startswith('##') and spanish_line.startswith('##'):
                pairs.append((f"<h2>{latin_line.strip('## ').strip()}</h2>", f"<h2>{spanish_line.strip('## ').strip()}</h2>"))
            else:
                # Formatting for bold and italic text within paragraphs
                latin_line, spanish_line = format_text(latin_line), format_text(spanish_line)
                pairs.append((f"<p>{latin_line}</p>", f"<p>{spanish_line}</p>"))
                
    toc += "</ul>"
    
    # Combine everything into the HTML content
    content = generate_content(pairs)
    
    # Prepare and write the final HTML output
    write_html(output_path, document_title, toc, content, author, translator)

def format_text(text):
    """Applies HTML formatting for bold and italic text."""
    text = re.sub(r"\*\*(.*?)\*\*", r"<strong>\1</strong>", text)
    text = re.sub(r"\*(.*?)\*", r"<em>\1</em>", text)
    return text

def generate_content(pairs):
    """Generates HTML content for the document."""
    return '\n'.join([f'<div class="row"><div class="column">{latin}</div><div class="column">{spanish}</div></div>' for latin, spanish in pairs])
        
def write_html(output_path, document_title, toc, content, author, translator):
    """Writes the formatted HTML to a file."""
    with open('template.html', 'r', encoding='utf-8') as file:
        template = file.read()
    
    # Ensure the keys match the placeholders in your HTML template
    html_output = template.format(document_title=document_title, toc=toc, content=content, author=author, translator=translator)
    
    with open(output_path, 'w', encoding='utf-8') as file:
        file.write(html_output)


In [23]:
# Example usage

works_path = "/Users/julioalonzom/Desktop/coding-projects/lumen-scholasticum/authors/Aquinas/"
output_path = "/Users/julioalonzom/Desktop/coding-projects/lumen-scholasticum/website/"

latin_path = works_path + 'derationibusfidei_lt.txt'
spanish_path = works_path + 'derationibusfidei_es.txt'
output_path = output_path + 'derationibusfidei.html'

create_html_pairs(latin_path, spanish_path, output_path)