In [None]:
import os
import re
import time
import openai
from dotenv import load_dotenv
from gutenbergpy.textget import get_text_by_id

# Load environment variables and set OpenAI API key
load_dotenv()
os.environ["OPENAI_API_KEY"] = "YOUR API KEY"
openai.api_key = os.getenv("OPENAI_API_KEY")

# User-configurable book settings
BOOK_ID = 1533               # Replace with Gutenberg ID
BOOK_NAME = "Macbeth"           # Replace with book title
VERSION_TYPE = "retold"  # "retold" or "summary"

# Fetch and clean text from Gutenberg
raw_text = get_text_by_id(BOOK_ID).decode("utf-8")

def clean_gutenberg_text(text):
    start = text.find("*** START OF THIS PROJECT GUTENBERG EBOOK")
    end   = text.find("*** END OF THIS PROJECT GUTENBERG EBOOK")
    if start != -1 and end != -1:
        return text[start + len("*** START OF THIS PROJECT GUTENBERG EBOOK"):end].strip()
    return text

clean_text = clean_gutenberg_text(raw_text)

# General chapter/section/act splitter

# 1) A much tighter splitter that only grabs real headings (Acts, Scenes, Chapters, Letters)
def smart_split_book(text):
    # 1) Try Acts first
    act_re = re.compile(r'^(Act\s+(?:\d+|[IVXLCDM]+))', flags=re.MULTILINE|re.IGNORECASE)
    acts = list(act_re.finditer(text))
    if acts:
        parts = {}
        for i, m in enumerate(acts):
            key   = m.group(1).title()  # “Act I”, “Act II”, etc.
            start = m.end()
            end   = acts[i+1].start() if i+1 < len(acts) else len(text)
            parts[key] = text[start:end].strip()
        return parts

    # 2) No Acts → fallback to Chapters and Letters
    #    Matches lines like "Chapter 1", "LETTER I", or just "I" / "1"
    fallback_re = re.compile(
        r'^(?P<head>(?:Chapter|CHAPTER|Letter|LETTER)\s+(?:\d+|[IVXLCDM]+)'
        r'|(?:\d+|[IVXLCDM]+))',
        flags=re.MULTILINE
    )
    matches = list(fallback_re.finditer(text))
    parts = {}
    for i, m in enumerate(matches):
        head = m.group('head').strip()
        # Normalize to title‐case
        if head.lower().startswith('chapter'):
            title = head.title()
        elif head.lower().startswith('letter'):
            title = head.title()
        else:
            # standalone numeral
            title = f"Section {head}"
        start = m.end()
        end   = matches[i+1].start() if i+1 < len(matches) else len(text)
        parts[title] = text[start:end].strip()
    return parts



chapters = smart_split_book(clean_text)
print(f"Detected {len(chapters)} sections for '{BOOK_NAME}' (ID: {BOOK_ID}).")

# Split large text into manageable chunks

def split_text_into_chunks(text, max_size=6000):
    chunks = []
    while len(text) > max_size:
        idx = text.rfind('. ', 0, max_size)
        idx = idx if idx != -1 else max_size
        chunks.append(text[:idx+1].strip())
        text = text[idx+1:].strip()
    if text:
        chunks.append(text)
    return chunks

# Generate via OpenAI API

def generate_content(prompt):
    response = openai.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a fun, engaging audiobook narrator."},
            {"role": "user",   "content": prompt}
        ],
        temperature=0.7
    )
    return response.choices[0].message.content.strip()

# Main assembly

def create_audiobook(book_name, chapters, version_type):
    start_time = time.time()
    script = []

    # Introduction generated by AI
    intro_prompt = (
        f"Write an engaging introduction for the audiobook of '{book_name}' in third person. "
        "Ask the audience to like, share, and subscribe."
    )
    script.append(generate_content(intro_prompt))

    # Chapter-by-chapter retelling or summary
    for title, body in chapters.items():
        print(f"Processing {title}…")
        for idx, chunk in enumerate(split_text_into_chunks(body), 1):
            section_title = title if idx == 1 else f"{title} (Part {idx})"
            if version_type.lower() == 'summary':
                prompt = (
                    f"Summarize the section titled “{section_title}” in fun, modern third-person style—about 500 words. "
                    "Keep it vivid, emotional, and engaging.\n\n"
                    f"{chunk}\n\n"
                )
            else:  # retold
                prompt = (
                    f"Retell the section titled “{section_title}” as a fully immersive, fun, modern third-person narrative. "
                    "Do NOT compress or summarize—cover every event and detail in sequence. "
                    "Include original dialogue in quotes. Aim for at least 800 words so it feels like a true audiobook script.\n\n"
                    f"{chunk}\n\n"
                )
            retold = generate_content(prompt)
            script.append(f"{section_title}\n<break time=1.5s/>\n{retold}\n<break time=1.5s/>")
    # Epilogue generated by AI
    epi_prompt = (
        f"Write an engaging epilogue for the audiobook of '{book_name}' in third person. "
        "Ask the audience to like, share, subscribe, and comment below what book they want uploaded next."
    )
    script.append(generate_content(epi_prompt))

    elapsed = (time.time() - start_time) / 60
    print(f"Completed in {elapsed:.1f} minutes.")
    return "\n\n".join(script)

# Runner

def main():
    global BOOK_NAME
    if not BOOK_NAME:
        BOOK_NAME = input("Enter the book title: ")
    audiobook_script = create_audiobook(BOOK_NAME, chapters, VERSION_TYPE)
    filename = f"{BOOK_NAME.replace(' ', '_')}_{VERSION_TYPE}.txt"
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(audiobook_script)
    print(f"Audiobook script saved to {filename}")

if __name__ == "__main__":
    main()


Detected 5 sections for 'Macbeth' (ID: 1533).
Processing Act I…
Processing Act Ii…
Processing Act Iii…
Processing Act Iv…
Processing Act V…
Completed in 8.7 minutes.
Audiobook script saved to Macbeth_retold.txt
