In [4]:

import os
import re
from fpdf import FPDF
import textwrap

def natural_key(s):
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', s)]

def parse_srt(path):
    """Return list of (start, end, text) for an .srt file."""
    entries = []
    with open(path, 'r', encoding='utf-8', errors='ignore') as f:
        content = f.read().replace('\r\n', '\n')
    blocks = re.split(r'\n\s*\n', content.strip())
    for b in blocks:
        lines = [ln.strip() for ln in b.split('\n') if ln.strip()]
        if len(lines) >= 2:
            # lines[0] may be index, lines[1] should be time
            time_line = lines[1] if re.search(r'-->', lines[1]) else lines[0]
            m = re.search(r'(\d{2}:\d{2}:\d{2}[,\.]\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}[,\.]\d{3})', time_line)
            if m:
                start, end = m.group(1).replace(',', '.'), m.group(2).replace(',', '.')
                # text lines are the remainder after the time line
                text_lines = lines[2:] if re.search(r'-->', lines[1]) else lines[1:]
                text = ' '.join(text_lines)
                entries.append((start, end, text))
    return entries

def convert_folder_srt_to_pdf(folder_path, output_pdf, font_size=12, line_height=6, wrap_width_chars=90):
    pdf = FPDF(unit='mm', format='A4')
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", size=14, style='B')
    pdf.cell(0, 8, "SRT Collection", ln=True, align='C')
    pdf.ln(4)
    pdf.set_font("Arial", size=10)
    srt_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.srt')]
    srt_files.sort(key=natural_key)
    if not srt_files:
        raise FileNotFoundError("No .srt files found in folder: " + folder_path)

    for srt in srt_files:
        path = os.path.join(folder_path, srt)
        entries = parse_srt(path)
        # File heading
        pdf.set_font("Arial", size=12, style='B')
        pdf.set_text_color(0, 0, 0)
        pdf.ln(2)
        pdf.cell(0, 7, srt, ln=True)
        pdf.ln(1)
        if not entries:
            pdf.set_font("Arial", size=10, style='')
            pdf.multi_cell(0, line_height, "(no valid subtitles found)")
            continue

        for start, end, text in entries:
            # timestamp line (muted)
            pdf.set_font("Arial", size=9, style='I')
            pdf.set_text_color(80, 80, 80)
            pdf.multi_cell(0, line_height, f"{start} --> {end}")
            # subtitle text (wrapped)
            pdf.set_font("Arial", size=font_size, style='')
            pdf.set_text_color(0, 0, 0)
            # wrap to reasonable length before passing to multi_cell to avoid huge long-word overflow
            wrapped = '\n'.join(textwrap.wrap(text, width=wrap_width_chars))
            pdf.multi_cell(0, line_height, wrapped)
            pdf.ln(1)

    # Save
    pdf.output(output_pdf)
    print(f"Saved PDF: {output_pdf}")

if __name__ == "__main__":
    folder = r"Z:\OneDrive\Gardening_2025\Leadership_Harvard\Rhetoric\Module 8 - Full Video Lectures\SUBS"
    out = os.path.join(folder, "R - SUBS.pdf")
    convert_folder_srt_to_pdf(folder, out)

Saved PDF: Z:\OneDrive\Gardening_2025\Leadership_Harvard\Rhetoric\Module 8 - Full Video Lectures\SUBS\R - SUBS.pdf
