In [3]:
!pip install elevenlabs python-dotenv



In [5]:
import os
from dotenv import load_dotenv

# Explicitly load the file with your key
load_dotenv('Elevenlabs.env') 

# Retrieve the key into a Python variable
ELEVENLABS_KEY = os.getenv("ELEVENLABS_API_KEY")

if ELEVENLABS_KEY:
    print("ElevenLabs API Key loaded successfully.")
else:
    print("ERROR: Key not found. Check file placement and name.")

ElevenLabs API Key loaded successfully.


In [2]:
!pip install pypdf



In [3]:
pip install "elevenlabs[pyaudio]"

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install "elevenlabs[pyaudio]" pypdf python-dotenv


Note: you may need to restart the kernel to use updated packages.


In [6]:
!pip install pyaudio



In [20]:
pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install pandas numpy scikit-learn gtts


Collecting gtts
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Downloading gTTS-2.5.4-py3-none-any.whl (29 kB)
Installing collected packages: gtts
Successfully installed gtts-2.5.4


In [7]:
pip install --upgrade elevenlabs

Collecting elevenlabs
  Downloading elevenlabs-2.24.0-py3-none-any.whl.metadata (9.2 kB)
Downloading elevenlabs-2.24.0-py3-none-any.whl (1.1 MB)
   ---------------------------------------- 0.0/1.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.1 MB ? eta -:--:--
   --------- ------------------------------ 0.3/1.1 MB ? eta -:--:--
   --------- ------------------------------ 0.3/1.1 MB ? eta -:--:--
   ------------------ --------------------- 0.5/1.1 MB 663.4 kB/s eta 0:00:01
   ------------------ --------------------- 0.5/1.1 MB 663.4 kB/s eta 0:00:01
   ------------------ --------------------- 0.5/1.1 MB 663.4 kB/s eta 0:00:01
   ---------------------------- ----------- 0.8/1.1 MB 467.9 kB/s eta 0:00:01
   ---------------------------- ----------- 0.8/1.1 MB 467.9 kB/s eta 0:00:01
   ---------------------------- ----------- 0.8/1.1 MB 467.9 kB/s eta 0:00:01
   ---------------------------- ----------

In [6]:
pip install google-genai




In [2]:
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import os
import re
from dotenv import load_dotenv 
import threading 
from io import BytesIO
import tempfile 

# --- GEMINI IMPORTS ---
from google import genai
from google.genai.errors import APIError

# --- ElevenLabs & PDF Imports ---
from elevenlabs.client import ElevenLabs 
from elevenlabs import VoiceSettings
from pypdf import PdfReader, PdfWriter 
from pypdf.errors import PdfReadError


# =======================================
# 1. AI & CONFIGURATION SETUP
# =======================================

# Load environment variables. 
# NOTE: The second line has been updated from 'OpenAI.env' to 'GeminiAI.env'
load_dotenv('Elevenlabs.env') 
load_dotenv('GeminiAI.env', override=True) # <-- CORRECTED FILE NAME

ELEVENLABS_KEY = os.getenv("ELEVENLABS_API_KEY")
GEMINI_KEY = os.getenv("GEMINI_API_KEY") 

# --- Initialize ElevenLabs Client ---
ELEVENLABS_CLIENT = None
if ELEVENLABS_KEY:
    try:
        ELEVENLABS_CLIENT = ElevenLabs(api_key=ELEVENLABS_KEY)
    except Exception:
        pass 

# --- Initialize Gemini Client ---
GEMINI_CLIENT = None
if GEMINI_KEY:
    try:
        GEMINI_CLIENT = genai.Client(api_key=GEMINI_KEY)
    except Exception as e:
        print(f"Gemini Client Initialization Failed: {e}") 
        pass
else:
    print("Gemini API key not found or improperly formatted.")


# =======================================
# 2. BACKEND LOGIC CLASS
# =======================================

class PDFVoiceBackEnd:
    VOICE_IDS = {
        "male": "pNInz6obpgDQGcFmaJgB", 
        "female": "21m00Tcm4TlvDq8ikWAM", 
        "male_dialogue": "VR6AewLTigWG4xSOukaG", 
        "female_dialogue": "EXAVITQu4vr4xnSDxMaL", 
    }
    MODEL_ID = "eleven_multilingual_v2"
    
    # Voice Settings
    STORY_SETTINGS = VoiceSettings(stability=0.75, similarity_boost=0.75, style=0.8, use_speaker_boost=True)
    NORMAL_SETTINGS = VoiceSettings(stability=0.6, similarity_boost=0.7, style=0.3, use_speaker_boost=True)
    EXPERT_SETTINGS = VoiceSettings(stability=0.7, similarity_boost=0.7, style=0.4, use_speaker_boost=True) 

    def __init__(self, pdf_path, pdf_type, mode, voice_choice, start_page, end_page, root, ui_callback, error_callback):
        self.pdf_path = pdf_path
        if pdf_type == "Normal PDF":
            self.pdf_type = "normal"
        elif pdf_type == "Story with dialogues":
            self.pdf_type = "story"
        elif pdf_type == "PDF with graphs and mathematical equations":
            self.pdf_type = "expert_panel"
        else:
            self.pdf_type = "normal"
            
        self.mode = mode
        self.voice_choice = voice_choice
        self.start_page = start_page
        self.end_page = end_page
        self.root = root
        self.ui_callback = ui_callback        
        self.error_callback = error_callback  

    def run(self):
        """Starts the main execution flow in a separate thread to prevent GUI freezing."""
        if ELEVENLABS_CLIENT is None:
            self.root.after(0, lambda: messagebox.showerror("Error", "ElevenLabs client is not initialized. Cannot generate audio."))
            return

        thread = threading.Thread(target=self._threaded_run_logic)
        thread.start()

    def _threaded_run_logic(self):
        """Logic executed in a separate thread to prevent GUI freeze."""
        
        text_to_read = ""
        
        # --- MULTIMODAL PATH (for Expert Panel / Graphs) ---
        if self.pdf_type == "expert_panel":
            if GEMINI_CLIENT is None:
                self.root.after(0, lambda: messagebox.showerror("Error", "Expert Panel mode requires a valid GEMINI API Key. Reading full PDF text instead."))
                self.pdf_type = "normal"
            else:
                try:
                    self.root.after(0, lambda: self.root.title("Interpreting Charts & Equations with AI..."))
                    self.root.after(0, lambda: messagebox.showinfo("Progress", "Sending PDF to AI for chart interpretation (this may take a moment)..."))
                    text_to_read = self._interpret_document_and_extract_text()
                except APIError as e:
                    self.root.after(0, lambda error=e: messagebox.showerror("AI Interpretation Failed (API Error)", f"AI Interpretation Failed: {e}. Reading full PDF text instead."))
                    self.pdf_type = "normal"
                except Exception as e:
                    self.root.after(0, lambda error=e: messagebox.showerror("AI Interpretation Failed", f"Processing Error: {e}. Reading full PDF text instead."))
                    self.pdf_type = "normal"

        # --- FALLBACK / TEXT-ONLY EXTRACTION PATH ---
        if not text_to_read or self.pdf_type != "expert_panel":
            try:
                full_text = self._extract_text_from_pdf()
                if not full_text.strip():
                     raise RuntimeError("The extracted text is empty or contains only whitespace.")
                text_to_read = full_text
            except PdfReadError as e:
                self.root.after(0, lambda error=e: messagebox.showerror("Error", f"PDF Read Error: {error} - The file might be corrupted or encrypted."))
                self.root.after(0, self.error_callback)
                return
            except Exception as e:
                self.root.after(0, lambda error=e: messagebox.showerror("Error", f"PDF Extraction Error: {error}"))
                self.root.after(0, self.error_callback)
                return

        # --- SUMMARY LOGIC (runs after extraction, and only if in Summary mode) ---
        if self.mode == "Summary":
            if GEMINI_CLIENT is None:
                 self.root.after(0, lambda: messagebox.showerror("Error", "Summarization requires a valid GEMINI API Key. Reading full PDF instead."))
                 self.mode = "Full PDF"
            else:
                try:
                    self.root.after(0, lambda: self.root.title("Generating Summary..."))
                    self.root.after(0, lambda: messagebox.showinfo("Progress", "Generating summary with AI (this may take a moment)..."))
                    text_to_read = self._generate_summary(text_to_read)
                except APIError as e:
                    self.root.after(0, lambda error=e: messagebox.showerror("AI Summarization Failed (API Error)", f"AI Summarization Failed: {e}. Reading full PDF instead."))
                    self.mode = "Full PDF"
                except Exception as e:
                    self.root.after(0, lambda error=e: messagebox.showerror("AI Summarization Failed", f"AI Summarization Failed: {e}. Reading full PDF instead."))
                    self.mode = "Full PDF"
                        
        # --- AUDIO GENERATION ---
        try:
            self.root.after(0, lambda: self.root.title("Generating Audio..."))
            
            if self.pdf_type == "story":
                if GEMINI_CLIENT is None:
                    self.root.after(0, lambda: messagebox.showerror("Error", "Gemini API is required for reliable dialogue separation. Cannot proceed with Story mode."))
                    self.root.after(0, self.error_callback)
                    return
                
                self.root.after(0, lambda: self.root.title("AI Tagging Speakers..."))
                # 1. TAG THE SPEAKERS USING GEMINI
                tagged_text = self._tag_dialogue_speakers(text_to_read) 
                
                # 2. GENERATE AUDIO USING THE TAGGED TEXT (Uses new robust regex)
                audio_data = self._generate_story_audio(tagged_text)
                    
            elif self.pdf_type == "expert_panel":
                audio_data = self._generate_single_voice_audio(text_to_read)
            else:
                audio_data = self._generate_single_voice_audio(text_to_read)
                
            # Call the success callback on the main thread
            self.root.after(0, lambda: self.ui_callback(audio_data))
            
        except RuntimeError as e:
            self.root.after(0, lambda error=e: messagebox.showerror("Audio Generation Failed (Runtime)", f"Processing Error: {error}"))
            self.root.after(0, self.error_callback)
        except Exception as e:
            self.root.after(0, lambda error=e: messagebox.showerror("Audio Generation Failed (API)", f"ElevenLabs API/Network Error: {e}"))
            self.root.after(0, self.error_callback)
    
    # --- Helper Functions (Text Processing) ---

    def _extract_text_from_pdf(self) -> str:
        """Extracts plain text from the specified page range."""
        reader = PdfReader(self.pdf_path)
        full_text = ""
        total_pages = len(reader.pages)
        start, end = 1, total_pages
        
        try:
            if self.start_page and self.end_page and self.start_page.isdigit() and self.end_page.isdigit():
                start = max(1, int(self.start_page))
                end = min(total_pages, int(self.end_page))
        except ValueError:
             pass
        
        for i in range(start - 1, end):
             page_text = reader.pages[i].extract_text()
             if page_text:
                 full_text += page_text + "\n\n"
             
        if not full_text.strip():
            raise RuntimeError("Could not extract text from the selected pages. Is the PDF text-searchable?")
        return full_text
    
    # --- Multimodal Interpretation for Expert Panel ---
    def _interpret_document_and_extract_text(self) -> str:
        """
        Sends the PDF file to the Gemini API for comprehensive multimodal analysis, 
        including chart interpretation, and receives a synthesized text summary.
        """
        # 1. Isolate the Page Range
        reader = PdfReader(self.pdf_path)
        writer = PdfWriter()
        total_pages = len(reader.pages)
        
        start, end = 1, total_pages
        try:
            if self.start_page and self.end_page and self.start_page.isdigit() and self.end_page.isdigit():
                start = max(1, int(self.start_page))
                end = min(total_pages, int(self.end_page))
        except ValueError:
             pass
        
        # Add the relevant pages to a new, temporary PDF writer
        for i in range(start - 1, end):
             writer.add_page(reader.pages[i])

        # 2. Save the relevant pages to a temporary file
        tmp_pdf_path = None
        try:
            # Use tempfile to ensure cleanup, even if an error occurs
            with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp_file:
                tmp_pdf_path = tmp_file.name
                writer.write(tmp_file)
            
            # 3. Create the prompt for the model
            prompt = (
                "Analyze the content of this document which contains technical or expert material, "
                "including prose, mathematical equations, and charts/graphs. For every chart or graph "
                "you encounter, **do not** transcribe the surrounding text. Instead, **explain** "
                "what the visual data shows, including key trends, axes, and conclusions. "
                "For all other text (equations, prose), transcribe it clearly and format it formally. "
                "Your final output should be a coherent, structured text ready for audio conversion."
            )

            # 4. Read the bytes of the temporary file
            with open(tmp_pdf_path, "rb") as f:
                doc_data = f.read()

            # 5. Call the Gemini API with the PDF bytes (multimodal input)
            response = GEMINI_CLIENT.models.generate_content(
                model='gemini-2.5-flash', 
                contents=[
                    genai.types.Part.from_bytes(data=doc_data, mime_type='application/pdf'),
                    prompt
                ]
            )
            
            return response.candidates[0].content.parts[0].text.strip()
        
        finally:
            # 6. Clean up the temporary file, ensuring it happens even on error
            if tmp_pdf_path and os.path.exists(tmp_pdf_path):
                os.remove(tmp_pdf_path)


    def _generate_summary(self, text: str) -> str:
        max_prompt_chars = 12000
        if len(text) > max_prompt_chars:
            text = text[:max_prompt_chars] + "\n[... Content Truncated for Summarization ...]"
        
        prompt = (
            "Summarize the following document concisely, clearly, and formally. "
            "Focus on the main arguments, key findings, and conclusion. Do not exceed 300 words. "
            "Document content:\n\n" + text
        )

        response = GEMINI_CLIENT.models.generate_content(
            model='gemini-2.5-flash', 
            contents=prompt
        )
        
        if response.candidates and response.candidates[0].content:
            return response.candidates[0].content.parts[0].text.strip()
        else:
            raise RuntimeError("Gemini API failed to return a valid summary.")
            
    # --- Gemini Speaker Tagging for Dialogue ---
    def _tag_dialogue_speakers(self, text: str) -> str:
        """
        Sends the story text to Gemini to identify and tag the gender of each dialogue speaker.
        """
        if GEMINI_CLIENT is None:
            raise RuntimeError("Gemini Client not initialized. Cannot perform speaker tagging.")

        prompt = (
            "Analyze the following story text. For every line of dialogue enclosed in quotation marks, "
            "determine if the speaker is male or female based on the narration context (e.g., 'he said', 'Ella asked'). "
            "Rewrite the entire text exactly as received, but immediately before each dialogue quote, insert a custom tag: "
            "<MALE_SPEAKS> or <FEMALE_SPEAKS>. Do NOT remove the original quotation marks. "
            "If the speaker is unknown or it is narration, use no tag. Do not include any explanation or preamble."
            f"Story text:\n\n{text}"
        )

        response = GEMINI_CLIENT.models.generate_content(
            model='gemini-2.5-flash', 
            contents=prompt
        )
        
        if response.candidates and response.candidates[0].content:
            return response.candidates[0].content.parts[0].text.strip()
        else:
            raise RuntimeError("Gemini API failed to return tagged text.")


    # --- Generation Logic ---

    def _generate_single_voice_audio(self, text: str) -> bytes:
        voice_id = self.VOICE_IDS[self.voice_choice.lower()]
        audio_data = self._generate_stream_data(
            text=text, voice_id=voice_id, settings=self.NORMAL_SETTINGS
        )
        return audio_data
        
    def _generate_story_audio(self, text: str) -> bytes:
        """
        Generates audio for story mode using Gemini's pre-generated tags.
        Includes explicit stripping of quotes before synthesis to fix blank audio segments.
        """
        narration_voice = self.VOICE_IDS['male'] 
        male_dialogue_voice = self.VOICE_IDS['male_dialogue']
        female_dialogue_voice = self.VOICE_IDS['female_dialogue']
        
        # Define all quote characters, including straight, smart, and Unicode
        quote_chars = '\‚Äú\‚Äù"\u0022'
        
        # ROBUST REGEX: Includes straight quotes (") and smart/curly quotes (‚Äú, ‚Äù)
        quote_pattern = r'[' + quote_chars + r'][^' + quote_chars + r']+[' + quote_chars + r']'
        split_pattern = r'(<MALE_SPEAKS>|<FEMALE_SPEAKS>|' + quote_pattern + r')'
        
        segments = re.split(split_pattern, text)
        full_audio_buffer = BytesIO()
        
        # Default voice state based on the last explicit tag encountered
        current_dialogue_voice = male_dialogue_voice 

        for segment in segments:
            # 1. Clean the segment thoroughly (stripping all whitespace and newlines)
            trimmed_segment = segment.strip()
            
            if not trimmed_segment: continue
            
            # 2. Check for Speaker Tags 
            if trimmed_segment == "<MALE_SPEAKS>":
                current_dialogue_voice = male_dialogue_voice
                continue
            elif trimmed_segment == "<FEMALE_SPEAKS>":
                current_dialogue_voice = female_dialogue_voice
                continue

            # 3. Determine if segment is Dialogue
            # Check for ANY common quote character at the start and end
            is_dialogue = (trimmed_segment.startswith('"') and trimmed_segment.endswith('"')) or \
                          (trimmed_segment.startswith('‚Äú') and trimmed_segment.endswith('‚Äù')) or \
                          (trimmed_segment.startswith(u'\u201C') and trimmed_segment.endswith(u'\u201D'))
                          
            if is_dialogue:
                # Use the voice determined by the most recent tag
                voice_id = current_dialogue_voice
                
                # CRITICAL FIX: Strip the quote marks before sending to the API.
                text_to_synthesize = trimmed_segment.strip(quote_chars)
                
            else:
                # 4. Narration
                voice_id = narration_voice 
                text_to_synthesize = trimmed_segment 
            
            # Final check before calling API
            if not text_to_synthesize:
                continue 
                
            try:
                audio_data = self._generate_stream_data(text=text_to_synthesize, voice_id=voice_id, settings=self.STORY_SETTINGS)
                full_audio_buffer.write(audio_data)
            except Exception as e:
                # This print statement will catch API errors like Quota Exceeded
                print(f"Segment generation failed for voice {voice_id}: {e}")
                
        full_audio_buffer.seek(0)
        
        if full_audio_buffer.getbuffer().nbytes == 0:
            raise RuntimeError("Story mode failed to generate any audio segments.")
            
        return full_audio_buffer.read()

    def _generate_stream_data(self, text: str, voice_id: str, settings: VoiceSettings) -> bytes:
        """Internal function to generate audio, return raw bytes, and check for empty data."""
        
        audio_stream_iterator = ELEVENLABS_CLIENT.text_to_speech.convert(
            text=text, voice_id=voice_id, voice_settings=settings, model_id=self.MODEL_ID, output_format="mp3_44100_128",
        )
        audio_buffer = BytesIO()
        data_received = False
        
        for chunk in audio_stream_iterator:
            if chunk:
                audio_buffer.write(chunk)
                data_received = True
        
        if not data_received:
            raise RuntimeError(
                f"ElevenLabs API returned empty audio data for voice {voice_id}. Possible causes: character limit exceeded, API key issue, or network timeout."
            )
            
        audio_buffer.seek(0)
        return audio_buffer.read()

# =======================================
# 3. FRONTEND UI & LOGIC INTEGRATION
# =======================================

audio_data = None 

# Define color schemes (SLEEK DARK MODE DEFAULT)
COLORS = {
    "dark": {
        "bg_main": "#1C1C30",       # Deep Navy/Charcoal
        "bg_box": "#34344A",        # Slightly lighter background for frames/inputs
        "fg_text": "#E0E0FF",       # Light Lavender/White for text
        "accent": "#4CAF50",        # Vibrant Teal/Green Accent
        "btn_start": "#00A07A",     # Primary Start Green
        "btn_start_disabled": "#404055", # Darker disabled state
        "btn_download": "#4CAF50",  # Teal/Green for Download
        "btn_text_light": "white", 
        "btn_text_dark": "#E0E0FF", 
        "mode_text": "Light Mode ‚òÄÔ∏è",
        "font_family": "Segoe UI", 
    },
    "light": {
        "bg_main": "#F7F9FF", 
        "bg_box": "#EAEFF7", 
        "fg_text": "#1C2A3A", 
        "accent": "#4A90E2",
        "btn_start": "#00CC99", 
        "btn_start_disabled": "#CCCCCC", 
        "btn_download": "#4A90E2",
        "btn_text_light": "white", 
        "btn_text_dark": "#1C2A3A", 
        "mode_text": "Dark Mode üåô",
        "font_family": "Arial", 
    }
}

current_mode = "dark" # Set Dark Mode as default

def apply_theme():
    """Applies the current global theme to all widgets, fixing white space issues."""
    global current_mode, style
    theme = COLORS[current_mode]
    font_main = (theme["font_family"], 11)
    font_title = (theme["font_family"], 24, "bold")
    font_header = (theme["font_family"], 14, "bold")
    font_button = (theme["font_family"], 14, "bold")
    
    # Root and Main Frame
    root.config(bg=theme["bg_main"])
    for widget in root.winfo_children():
        if isinstance(widget, tk.Frame) or isinstance(widget, tk.Label):
            widget.config(bg=theme["bg_main"])
            
    # Main Labels and Titles
    title_label.config(fg=theme["accent"], bg=theme["bg_main"], font=font_title)
    pdf_label.config(fg=theme["fg_text"], bg=theme["bg_main"], font=(theme["font_family"], 10))
    
    # All Labels (General)
    for widget in main.winfo_children():
        if isinstance(widget, tk.Label):
            widget.config(bg=theme["bg_main"], fg=theme["fg_text"], font=font_header if "Select PDF" in widget.cget("text") or "Choose Pages" in widget.cget("text") else font_main)
    
    # Page Selection Radio Buttons (FIXED WHITE SPACES)
    for child in page_frame.winfo_children():
        if isinstance(child, tk.Radiobutton):
            child.config(
                bg=theme["bg_main"], 
                fg=theme["fg_text"], 
                activebackground=theme["bg_main"], 
                selectcolor=theme["bg_box"], 
                font=font_main, 
                activeforeground=theme["accent"]
            )
    for child in range_frame.winfo_children():
        if isinstance(child, tk.Label):
            child.config(bg=theme["bg_main"], fg=theme["fg_text"], font=font_main)
    
    # Entry Widgets (FIXED WHITE SPACES)
    from_entry.config(bg=theme["bg_box"], fg=theme["fg_text"], insertbackground=theme["fg_text"], font=font_main) 
    to_entry.config(bg=theme["bg_box"], fg=theme["fg_text"], insertbackground=theme["fg_text"], font=font_main)

    # Options Frame Labels
    for child in options_frame.winfo_children():
        if isinstance(child, tk.Label):
            child.config(bg=theme["bg_main"], fg=theme["fg_text"], font=font_main)

    # Comboboxes 
    style.configure(
        "TCombobox", 
        fieldbackground=theme["bg_box"], 
        background=theme["bg_main"], 
        foreground=theme["fg_text"], 
        arrowcolor=theme["fg_text"],
        selectforeground=theme["fg_text"],
        selectbackground=theme["bg_box"],
        font=font_main
    )
    style.map('TCombobox', fieldbackground=[('readonly', theme["bg_box"])])
    
    # Buttons (Non-TTK)
    browse_button.config(bg=theme["bg_box"], fg=theme["accent"], activebackground=theme["bg_box"], font=(theme["font_family"], 11, "bold"))

    start_button.config(
        bg=theme["btn_start"] if start_button['state'] != 'disabled' else theme["btn_start_disabled"],
        fg=theme["btn_text_light"],
        activebackground=theme["accent"],
        font=font_button
    )
    download_button.config(
        bg=theme["btn_download"] if download_button['state'] != 'disabled' else theme["btn_start_disabled"],
        fg=theme["btn_text_light"],
        activebackground=theme["accent"],
        font=font_button
    )
    
    # Mode Toggle Button
    mode_toggle_button.config(text=theme["mode_text"], bg=theme["bg_box"], fg=theme["fg_text"], font=(theme["font_family"], 10))

def toggle_dark_mode():
    """Switches between light and dark themes."""
    global current_mode
    current_mode = "light" if current_mode == "dark" else "dark"
    apply_theme()


def create_ui():
    global root, file_path, pdf_label, page_mode_var, from_entry, to_entry, pdf_type_var, pdf_type_dropdown, summary_var, summary_dropdown, voice_var, voice_dropdown
    global download_button, start_button, audio_data, main, title_label, page_frame, range_frame, options_frame, browse_button, mode_toggle_button
    global style

    root = tk.Tk()
    root.title("PDF to Voice Converter")
    root.geometry("680x650")
    root.resizable(False, False)

    style = ttk.Style(root)
    style.theme_use('clam') 

    main = tk.Frame(root)
    main.pack(pady=30, anchor="w", padx=40)
    
    def toggle_voice_and_summary(*args):
        pdf_type = pdf_type_var.get()
        is_fixed_voice_mode = pdf_type in ("Story with dialogues", "PDF with graphs and mathematical equations")
        voice_state = "disabled" if is_fixed_voice_mode else "readonly"
        voice_dropdown.config(state=voice_state)
        
        is_summary_disabled = pdf_type in ("Story with dialogues", "PDF with graphs and mathematical equations")
        summary_state = "disabled" if is_summary_disabled else "readonly"
        summary_dropdown.config(state=summary_state)
        if is_summary_disabled:
            summary_var.set("Full PDF")
        
        reset_audio_controls(reset_start_button=True)

    theme = COLORS[current_mode]
    
    mode_toggle_button = tk.Button(
        root, text=theme["mode_text"], command=toggle_dark_mode, 
        relief="flat", cursor="hand2", font=("Arial", 10), bd=0
    )
    mode_toggle_button.place(x=500, y=30)

    title_label = tk.Label(
        main, text="PDF to Audio Converter",
        font=("Arial", 24, "bold")
    )
    title_label.pack(anchor="w", pady=(0, 30))

    def choose_pdf():
        global file_path
        new_path = filedialog.askopenfilename(filetypes=[("PDF Files", "*.pdf")])
        if new_path:
            file_path = new_path
            display_path = os.path.basename(file_path)
            pdf_label.config(text=f"Selected: {display_path}")
            reset_audio_controls(reset_start_button=True)

    tk.Label(main, text="Select PDF file:", font=("Arial", 14, "bold")).pack(anchor="w")
    browse_button = tk.Button(main, text="Browse PDF", command=choose_pdf, width=15, font=("Arial", 11), cursor="hand2", relief="flat", bd=2)
    browse_button.pack(anchor="w", pady=5)

    pdf_label = tk.Label(main, text="No file selected", font=("Arial", 10))
    pdf_label.pack(anchor="w", pady=(0, 20))

    tk.Label(
        main, text="Choose Pages to Convert:",
        font=("Arial", 14, "bold")
    ).pack(anchor="w")

    page_mode_var = tk.StringVar(value="Full")

    page_frame = tk.Frame(main)
    page_frame.pack(anchor="w", pady=5)

    tk.Radiobutton(
        page_frame, text="Full PDF", variable=page_mode_var,
        value="Full", font=("Arial", 11), cursor="hand2", highlightthickness=0
    ).grid(row=0, column=0, sticky="w", pady=2)

    tk.Radiobutton(
        page_frame, text="Select Page Range", variable=page_mode_var,
        value="Range", font=("Arial", 11), cursor="hand2", highlightthickness=0
    ).grid(row=1, column=0, sticky="w", pady=2)

    range_frame = tk.Frame(main)
    range_frame.pack(anchor="w", pady=(5, 20))

    tk.Label(range_frame, text="From:", font=("Arial", 11)).grid(row=0, column=0, padx=(0, 5))
    from_entry = tk.Entry(range_frame, width=5, relief="flat", borderwidth=1, font=("Arial", 11), bd=2)
    from_entry.grid(row=0, column=1, padx=5)

    tk.Label(range_frame, text="To:", font=("Arial", 11)).grid(row=0, column=2, padx=5)
    to_entry = tk.Entry(range_frame, width=5, relief="flat", borderwidth=1, font=("Arial", 11), bd=2)
    to_entry.grid(row=0, column=3, padx=5)


    options_frame = tk.Frame(main)
    options_frame.pack(anchor="w", pady=(0, 20))
    
    tk.Label(options_frame, text="1. PDF Category:", font=("Arial", 12)).grid(row=0, column=0, sticky="w", pady=5, padx=(0, 15))
    pdf_type_var = tk.StringVar()
    pdf_type_dropdown = ttk.Combobox(
        options_frame, textvariable=pdf_type_var, state="readonly",
        values=["Normal PDF", "Story with dialogues", "PDF with graphs and mathematical equations"],
        width=40
    )
    pdf_type_dropdown.grid(row=0, column=1, sticky="w", pady=5)
    pdf_type_dropdown.set("Normal PDF")
    pdf_type_var.trace_add("write", toggle_voice_and_summary)


    tk.Label(options_frame, text="2. Content Mode:", font=("Arial", 12)).grid(row=1, column=0, sticky="w", pady=5, padx=(0, 15))
    summary_var = tk.StringVar()
    summary_dropdown = ttk.Combobox(
        options_frame, textvariable=summary_var, state="readonly",
        values=["Full PDF", "Summary"], width=25
    )
    summary_dropdown.grid(row=1, column=1, sticky="w", pady=5)
    summary_dropdown.set("Full PDF")


    tk.Label(options_frame, text="3. Voice Type:", font=("Arial", 12)).grid(row=2, column=0, sticky="w", pady=5, padx=(0, 15))
    voice_var = tk.StringVar()
    voice_dropdown = ttk.Combobox(
        options_frame, textvariable=voice_var, state="readonly",
        values=["Male", "Female"],
        width=25
    )
    voice_dropdown.grid(row=2, column=1, sticky="w", pady=5)
    voice_dropdown.set("Male")
    

    action_frame = tk.Frame(main)
    action_frame.pack(anchor="w", pady=(20, 0))

    start_button = tk.Button(
        action_frame, 
        text="START PROCESSING", 
        width=20, 
        command=lambda: start_processing(on_audio_generated, reset_audio_controls_on_error), 
        fg="white", 
        font=("Arial", 14, "bold"), 
        relief="flat",
        cursor="hand2",
        bd=0
    )
    start_button.grid(row=0, column=0, padx=10, pady=10)
    
    def download_audio_file():
        global audio_data
        
        if audio_data is None or len(audio_data) == 0: 
            messagebox.showwarning("Warning", "Audio generation is not complete or failed. Please process the PDF first.")
            return
            
        try:
            output_audio_filename = filedialog.asksaveasfilename(
                defaultextension=".mp3",
                filetypes=[("MP3 files", "*.mp3")],
                initialfile="audiobook_output.mp3",
                title="Save Audio File" 
            )
            if output_audio_filename:
                with open(output_audio_filename, "wb") as f:
                    f.write(audio_data)
                messagebox.showinfo("Success", f"File saved successfully as: {output_audio_filename}\nFile size: {len(audio_data) / 1024:.2f} KB")
            else:
                 messagebox.showinfo("Cancelled", "File saving cancelled by user.")
            
        except Exception as e:
            messagebox.showerror("File Error", f"Failed to save file: {e}")

    download_button = tk.Button(
        action_frame, 
        text="‚¨á Download Audio", 
        width=20, 
        command=download_audio_file, 
        state="disabled",
        fg="white", 
        font=("Arial", 14), 
        relief="flat",
        cursor="hand2",
        bd=0
    )
    download_button.grid(row=0, column=1, padx=10, pady=10)
    
    
    def reset_audio_controls(reset_start_button=False):
        global audio_data
        theme = COLORS[current_mode]
        audio_data = None
        download_button.config(state="disabled", bg=theme["btn_start_disabled"])
        root.title("PDF to Voice Converter")
        if reset_start_button:
            start_button.config(state="normal", text="START PROCESSING", bg=theme["btn_start"])

    def reset_audio_controls_on_error():
        reset_audio_controls(reset_start_button=True)
        
    def on_audio_generated(data):
        global audio_data
        theme = COLORS[current_mode]
        
        audio_data = data
        download_button.config(state="normal", bg=theme["btn_download"])
        
        start_button.config(state="disabled", text="PROCESSING COMPLETE", bg=theme["btn_start_disabled"]) 
        
        root.title("Audio Ready - Download Now")
        messagebox.showinfo("Complete", "Audio generation complete. Click 'Download Audio' to save your file.")


    def start_processing(callback_success, callback_error):
        global file_path
        theme = COLORS[current_mode]
        
        if not file_path:
            messagebox.showerror("Error", "Please select a PDF file first!")
            return

        global audio_data
        audio_data = None
        download_button.config(state="disabled", bg=theme["btn_start_disabled"])
        start_button.config(state="disabled", text="...PROCESSING...", bg=theme["btn_start_disabled"])
        
        page_mode = page_mode_var.get()
        start_p = from_entry.get() if page_mode == "Range" else ""
        end_p = to_entry.get() if page_mode == "Range" else ""

        if page_mode == "Range":
            try:
                start_int = int(start_p)
                end_int = int(end_p)
                if not (start_p.isdigit() and end_p.isdigit() and start_int > 0 and end_int >= start_int):
                    raise ValueError("Invalid page range input.")
            except ValueError:
                 messagebox.showerror("Error", "Please enter valid, positive page numbers for the range (From <= To).")
                 callback_error()
                 return

        pdf_type_choice = pdf_type_var.get()
        voice = voice_var.get()
        mode = summary_var.get()

        is_summary_or_expert_or_story = mode == "Summary" or pdf_type_choice == "PDF with graphs and mathematical equations" or pdf_type_choice == "Story with dialogues"
        
        # Check for Gemini key *only* if AI processing is required
        if is_summary_or_expert_or_story and GEMINI_CLIENT is None:
              messagebox.showerror("Error", f"The selected mode ('{mode}', '{pdf_type_choice}') requires a functional GEMINI API Key for AI processing.")
              callback_error()
              return
            
        backend = PDFVoiceBackEnd(
            file_path, pdf_type_choice, mode, voice, start_p, end_p, root,
            callback_success, 
            callback_error    
        )
        backend.run()

    if ELEVENLABS_CLIENT is None:
        messagebox.showerror("Error", "ElevenLabs API Key not found. Audio generation is disabled.")
        start_button.config(state="disabled", text="API KEY MISSING", bg="#FF3333")

    toggle_voice_and_summary()
    apply_theme() 

    root.mainloop()

if __name__ == "__main__":
    file_path = ""
    create_ui()