In [2]:
!pip install torch transformers librosa soundfile google-generativeai moviepy python-pptx Pillow pdf2image markdown-it-py[linkify,plugins] weasyprint pypandoc

Collecting python-pptx
  Downloading python_pptx-1.0.2-py3-none-any.whl.metadata (2.5 kB)
Collecting weasyprint
  Downloading weasyprint-65.1-py3-none-any.whl.metadata (3.7 kB)
Collecting pypandoc
  Downloading pypandoc-1.15-py3-none-any.whl.metadata (16 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Co

In [3]:
!apt-get update -qq && apt-get install -y -qq poppler-utils texlive-xetex texlive-fonts-recommended

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Extracting templates from packages: 100%
Preconfiguring packages ...
Selecting previously unselected package fonts-lato.
(Reading database ... 129184 files and directories currently installed.)
Preparing to unpack .../00-fonts-lato_2.0-2.1_all.deb ...
Unpacking fonts-lato (2.0-2.1) ...
Selecting previously unselected package tex-common.
Preparing to unpack .../01-tex-common_6.17_all.deb ...
Unpacking tex-common (6.17) ...
Selecting previously unselected package libkpathsea6:amd64.
Preparing to unpack .../02-libkpathsea6_2021.20210626.59705-1ubuntu0.2_amd64.deb ...
Unpacking libkpathsea6:amd64 (2021.20210626.59705-1ubuntu0.2) ...
Selecting previously unselected package libwoff1:amd64.
Preparing to unpack .../03-libwoff1_1.0.2-1build4_amd64.deb ...
Unpacking libwoff1:amd64 (1.0.2-1build4) ...
Selecting

In [None]:
AIzaSyDDL1dsiHNoCNkewZSnL3QCRx5mi0NpZuc

In [5]:
#!/usr/bin/env python
"""
transcript_to_handout.py – One‑stop pipeline for Kaggle
======================================================
Workflow
--------
1. **Media → WAV** – selects audio/video from `/kaggle/input` or `/kaggle/working`, extracts/​resamples to 16 kHz mono.
2. **Whisper** – transcribes with open‑source *whisper‑large‑v3-turbo*.
3. **Gemini‑2.5 flash** – **three** strictly separated calls:
   a. *Rewrite* raw transcript → book‑like draft (maximal fidelity).
   b. *Review* draft → fix theoretical errors, keep wording intact when correct.
   c. *Placeholder pass* → given slide snippets, insert `{{SLIDE:NNN}}` tokens.
4. **Slide export** – PPTX or PDF → numbered PNGs **and** short text snippets.
5. **Token swap** – regex replaces placeholders with real Markdown image links.
6. **PDF render** – tries Pandoc first; if that fails, falls back to an
   **HTML→PDF** route using *markdown‑it‑py* → *WeasyPrint* (robust on malformed
   Markdown).

Only the three Gemini calls consume tokens; everything else runs locally.
"""

# ────────────────────────────────────────────────────────────────────────────
# Imports
# ────────────────────────────────────────────────────────────────────────────
import os, re, subprocess, sys, tempfile, random, string, warnings
from pathlib import Path
from typing import Dict, List, Tuple
import io
import time

import librosa, soundfile as sf
import torch
from transformers import pipeline, WhisperForConditionalGeneration, WhisperProcessor

# Kaggle secrets import
try:
    from kaggle_secrets import UserSecretsClient
    KAGGLE_SECRETS_AVAILABLE = True
except ImportError:
    KAGGLE_SECRETS_AVAILABLE = False
    print("Warning: kaggle_secrets not available, falling back to environment variables")

# Lazy imports for heavy/optional libs
MOVIEPY, PPTX, PDF2IMAGE, WEASYPRINT, PYPANDOC, PIL = [False] * 6

def _lazy(name: str):
    """Import helper that sets a global flag on success."""
    try:
        if name == "moviepy.editor":
            import moviepy.editor
            globals()["moviepy"] = moviepy
            globals()["MOVIEPY"] = True
        elif name == "pptx":
            import pptx
            globals()["pptx"] = pptx
            globals()["PPTX"] = True
        elif name == "pdf2image":
            import pdf2image
            globals()["pdf2image"] = pdf2image
            globals()["PDF2IMAGE"] = True
        elif name == "weasyprint":
            import weasyprint
            globals()["weasyprint"] = weasyprint
            globals()["WEASYPRINT"] = True
        elif name == "pypandoc":
            import pypandoc
            globals()["pypandoc"] = pypandoc
            globals()["PYPANDOC"] = True
        elif name == "PIL":
            from PIL import Image
            globals()["PIL"] = Image
            globals()["PIL"] = True
        else:
            module = __import__(name)
            globals()[name] = module
            globals()[name.upper()] = True
    except ImportError as e:
        print(f"Note: {name} not available - will install when needed")
        pass

# Try to import optional dependencies
_lazy("moviepy.editor")
_lazy("pptx")
_lazy("pdf2image")
_lazy("weasyprint")
_lazy("pypandoc")
_lazy("PIL")

import google.generativeai as genai

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# ────────────────────────────────────────────────────────────────────────────
# Helpers
# ────────────────────────────────────────────────────────────────────────────

def uid(n: int = 6) -> str:
    return "".join(random.choices(string.ascii_lowercase + string.digits, k=n))


def safe_pip(pkg: str):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg, "-q"])
        print(f"Successfully installed {pkg}")
        return True
    except subprocess.CalledProcessError as e:
        print(f"Failed to install {pkg}: {e}")
        return False


def get_api_key() -> str:
    """Get Gemini API key from Kaggle secrets or environment variables."""
    if KAGGLE_SECRETS_AVAILABLE:
        try:
            user_secrets = UserSecretsClient()
            api_key = user_secrets.get_secret("GEMINI_API_KEY")
            if api_key:
                return api_key
        except Exception as e:
            print(f"Failed to get API key from Kaggle secrets: {e}")
    
    # Fall back to environment variable
    return os.getenv("GEMINI_API_KEY", "")


# ────────────────────────────────────────────────────────────────────────────
# 1 ▸ Choose media
# ────────────────────────────────────────────────────────────────────────────
AUDIO_EXT = (".wav", ".mp3", ".ogg", ".m4a", ".aac", ".flac", ".webm")
VIDEO_EXT = (".mp4", ".mkv", ".mov", ".avi", ".flv")
MEDIA_EXT = AUDIO_EXT + VIDEO_EXT


def pick_media() -> Path:
    # Search both /kaggle/input and /kaggle/working
    search_dirs = ["/kaggle/input", "/kaggle/working"]
    choices = []
    
    for search_dir in search_dirs:
        if os.path.exists(search_dir):
            for r, _, fs in os.walk(search_dir):
                for f in fs:
                    if f.lower().endswith(MEDIA_EXT):
                        choices.append(Path(r) / f)
    
    if not choices:
        sys.exit("No audio or video found in /kaggle/input or /kaggle/working.")
    
    print("Available media files:")
    for i, p in enumerate(choices):
        # Show relative path from /kaggle/
        rel_path = p.relative_to("/kaggle") if str(p).startswith("/kaggle/") else p
        print(f" [{i}] {rel_path}")
    
    while True:
        try:
            idx = int(input(f"Choose file 0–{len(choices)-1}: "))
            if 0 <= idx < len(choices):
                return choices[idx]
            else:
                print(f"Please enter a number between 0 and {len(choices)-1}")
        except ValueError:
            print("Please enter a valid number")


def pick_slides() -> Path:
    """Select slide deck from /kaggle/input or /kaggle/working"""
    search_dirs = ["/kaggle/input", "/kaggle/working"]
    choices = []
    
    for search_dir in search_dirs:
        if os.path.exists(search_dir):
            for r, _, fs in os.walk(search_dir):
                for f in fs:
                    if f.lower().endswith((".pptx", ".pdf")):
                        choices.append(Path(r) / f)
    
    if not choices:
        print("⚠️ No slide deck (.pptx/.pdf) found in /kaggle/input or /kaggle/working")
        return None
    
    print("\nAvailable slide decks:")
    for i, p in enumerate(choices):
        rel_path = p.relative_to("/kaggle") if str(p).startswith("/kaggle/") else p
        print(f" [{i}] {rel_path}")
    
    print(f" [{len(choices)}] Skip slides (no slide integration)")
    
    while True:
        try:
            idx = int(input(f"Choose slide deck 0–{len(choices)} (or {len(choices)} to skip): "))
            if 0 <= idx < len(choices):
                return choices[idx]
            elif idx == len(choices):
                return None
            else:
                print(f"Please enter a number between 0 and {len(choices)}")
        except ValueError:
            print("Please enter a valid number")


# ────────────────────────────────────────────────────────────────────────────
# 2 ▸ Audio extraction/resample
# ────────────────────────────────────────────────────────────────────────────

def to_wav(src: Path, tmp: Path) -> Path:
    try:
        if src.suffix.lower() in VIDEO_EXT:
            if not MOVIEPY:
                if safe_pip("moviepy"):
                    _lazy("moviepy.editor")
                else:
                    raise RuntimeError("Could not install moviepy")
            from moviepy.editor import VideoFileClip
            with VideoFileClip(str(src)) as clip:
                if clip.audio is None:
                    raise ValueError("Video file has no audio track")
                clip.audio.write_audiofile(str(tmp), codec="pcm_s16le", logger=None, verbose=False)
        else:
            y, _ = librosa.load(str(src), sr=16000, mono=True)
            sf.write(str(tmp), y, 16000)
        return tmp
    except Exception as e:
        raise RuntimeError(f"Failed to process audio from {src}: {e}")


# ────────────────────────────────────────────────────────────────────────────
# 3 ▸ Whisper transcription
# ────────────────────────────────────────────────────────────────────────────

def whisper_transcribe(wav: Path) -> str:
    try:
        # Load Whisper model & tokenizer
        device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {device}")
        
        # Using whisper-large-v3-turbo as requested
        model_id = "openai/whisper-large-v3-turbo"
        processor = WhisperProcessor.from_pretrained(model_id)
        model = WhisperForConditionalGeneration.from_pretrained(model_id).to(device)
        
        # Force English transcription
        language = "english"
        task = "transcribe"
        forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
        
        # Create pipeline with chunk_length to avoid timestamp issues
        asr = pipeline(
            "automatic-speech-recognition",
            model=model,
            tokenizer=processor.tokenizer,
            feature_extractor=processor.feature_extractor,
            return_timestamps=True,  # Disable timestamps to avoid the error
            #chunk_length_s=30,  # Process in 30-second chunks
            device=0 if device == "cuda" else -1,
            generate_kwargs={"forced_decoder_ids": forced_decoder_ids}
        )
        
        result = asr(str(wav))
        return result["text"]
    except Exception as e:
        print(f"Error with whisper-large-v3-turbo: {e}")
        print("Falling back to basic transcription...")
        try:
            # Simple fallback without timestamps
            device = "cuda" if torch.cuda.is_available() else "cpu"
            model_id = "openai/whisper-large-v3-turbo"
            processor = WhisperProcessor.from_pretrained(model_id)
            model = WhisperForConditionalGeneration.from_pretrained(model_id).to(device)
            
            # Load audio
            audio, sr = librosa.load(str(wav), sr=16000)
            inputs = processor(audio, sampling_rate=16000, return_tensors="pt").to(device)
            
            # Generate without timestamps
            with torch.no_grad():
                predicted_ids = model.generate(**inputs, forced_decoder_ids=forced_decoder_ids)
            
            transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
            return transcription
        except Exception as e2:
            raise RuntimeError(f"Transcription failed with both methods: {e}, {e2}")


# ────────────────────────────────────────────────────────────────────────────
# 4 ▸ Gemini triple‑step
# ────────────────────────────────────────────────────────────────────────────
GEM_PROMPT_REWRITE = (
    "You are an expert academic writer and editor, tasked with transforming a raw lecture transcript into a meticulously structured, exceptionally clear, and highly readable handout suitable for students. Your goal is to make complex topics easily digestible and the lecture's content flow naturally.\n\n"
    "**Input:** A raw, unprocessed lecture transcript.\n\n"
    "**Primary Goal:** Create a 'book-style' draft of a student handout that meticulously follows the lecture's content and professor's explanations. It should be SUPER CLEAR, fluid to read, and engaging.\n\n"
    "**Detailed Instructions:**\n"
    "1.  **High Fidelity & Clarity:**\n"
    "    *   Preserve all core information, concepts, and the professor's explanations from the transcript with maximal fidelity.\n"
    "    *   Rewrite for exceptional clarity and flow. Rephrase awkward sentences, fix grammatical errors, and ensure smooth transitions between topics.\n"
    "    *   Eliminate verbal disfluencies (e.g., 'um', 'uh', 'like', 'you know'), repetitions, and false starts, UNLESS a false start or a particular phrasing is crucial for understanding the professor's thought process or a specific nuance.\n"
    "2.  **Structure & Formatting (CRITICAL for Readability):\n"
    "    *   Organize the content logically. Introduce clear main headings (e.g., `## Main Topic`) and subheadings (e.g., `### Sub-topic`) to break down the lecture into digestible sections. Infer these from the natural flow and topic shifts in the transcript.\n"
    "    *   Use bullet points (`* item`) or numbered lists (`1. item`) for enumerations, steps, key takeaways, or lists of features/examples to enhance scannability and clarity.\n"
    "    *   Employ bold text (`**bold**`) for emphasis on key terms or concepts, but use it judiciously.\n"
    "    *   Ensure paragraphs are well-formed and focus on a single idea or a few closely related ideas.\n"
    "3.  **Tone & Style:**\n"
    "    *   Adopt a tone that is authoritative yet accessible and engaging – imagine a knowledgeable professor explaining concepts directly and clearly to a student.\n"
    "    *   The style should be more formal than a direct transcript but less dry than a dense academic paper. It's a 'handout', so it should be inviting to read.\n"
    "4.  **Content Enhancement (Extrapolation from Explanation):\n"
    "    *   If the professor alludes to a concept or explains it briefly, and the context allows for a slight, logical expansion to improve student understanding, you may add a concise explanatory sentence. Stick closely to what can be reasonably inferred from the professor's words.\n"
    "    *   Define jargon or technical terms when they are first introduced or if the professor provides a definition.\n"
    "5.  **Handling Uncertainty:**\n"
    "    *   If parts of the transcript are genuinely inaudible or unintelligible, mark the uncertain section with `(?)`.\n"
    "    *   At the very end of the handout, create a distinct section titled `### Notes for Reviewer` and list any significant uncertainties or points where the professor's meaning was ambiguous.\n"
    "6.  **IMPORTANT FORMATTING NOTE (to prevent PDF issues):\n"
    "    *   Use spaces for ALL indentation in the Markdown output. Do NOT use tab characters, as these are often misinterpreted as code blocks by Markdown renderers.\n\n"
    "**Output Format:** Clean, well-structured Markdown text."
)

GEM_PROMPT_REVIEW = (
    "You are a subject matter expert tasked with reviewing a draft student handout that was generated from a lecture transcript. The previous step focused on transforming the raw transcript into a clear, well-structured, and readable 'book-style' document, aiming for high fidelity to the original lecture.\n\n"
    "**Input:** A draft handout in Markdown format.\n\n"
    "**Primary Goal:** Review the handout *exclusively* for factual and theoretical errors related to the subject matter. Ensure the content is accurate and does not misrepresent any concepts discussed in the (implied) original lecture.\n\n"
    "**Detailed Instructions:**\n"
    "1.  **Accuracy First:** Your primary concern is the correctness of the information.\n"
    "2.  **Minimal Changes:**\n"
    "    *   Correct errors concisely and precisely.\n"
    "    *   **Crucially, preserve the established writing style, structure (headings, lists, etc.), clarity, and overall wording of the draft as much as possible.** Only alter wording if it's essential to correct a factual/theoretical error or to prevent a significant misunderstanding stemming from an inaccuracy.\n"
    "    *   Do NOT make stylistic changes if the existing text is already clear and theoretically sound. Do not rephrase for minor improvements if the current text is correct.\n"
    "3.  **Integration:** Ensure your corrections integrate seamlessly into the existing text, maintaining the flow and tone.\n"
    "4.  **Focus:** Do not add new information or sections that weren't implied by the original rewrite. Your task is error correction, not expansion at this stage.\n\n"
    "**Output Format:** The reviewed and corrected handout in the same Markdown format."
)

GEM_PROMPT_PLACE = (
    "You are an expert in seamlessly integrating visual aids (slides) into educational text to create comprehensive and highly understandable learning materials. Your goal is to enrich an existing, well-structured, book-like handout with relevant content from a provided slide deck, ensuring the final document is SUPER CLEAR and fluid to read.\n\n"
    "**Input:**\n"
    "1.  A discoursive, well-structured handout (already rewritten for clarity and reviewed for accuracy).\n"
    "2.  A list of (Slide Number `NNN`, Text Snippet from Slide) pairs.\n"
    "3.  Potentially, the full slide deck as a PDF for better visual context (if provided).\n\n"
    "**Primary Goal:** Intelligently integrate slide information into the handout to enhance student understanding, maintaining the handout's established high-quality narrative style and clarity. The integration should feel natural and purposeful.\n\n"
    "**Guidelines for Slide Integration:**\n"
    "1.  **Preserve Handout Foundation:** The handout's current text is the primary narrative. Integrate slide information to complement and expand upon it, not to replace existing clear explanations.\n"
    "2.  **Evaluate Visual Necessity (using slide snippets and full deck if available):\n"
    "    *   For each slide, determine if its *visual elements* (diagrams, charts, complex images, unique layouts) are crucial for understanding the corresponding topic in the handout, beyond what text alone can convey.\n"
    "3.  **Integrate Content Strategically:**\n"
    "    a.  **Visually Essential Slides:** If a slide's visual content is essential, insert a placeholder `{{SLIDE:NNN}}` (e.g., `{{SLIDE:001}}`) on its own dedicated line where it is most relevant to the handout's discussion. \n"
    "        *   You MAY add a very brief (one sentence maximum) lead-in or follow-up sentence to naturally bridge the text with the visual if it improves flow. For example: 'The following diagram illustrates this process: {{SLIDE:007}} As the diagram shows...'\n"
    "    b.  **Text-Heavy or Redundant Visual Slides:** If a slide primarily contains text (bullet points, simple statements) or visuals that are adequately described or made redundant by the handout's existing text, or whose content can be smoothly woven into the narrative: **DO NOT use a `{{SLIDE:NNN}}` placeholder.** Instead, integrate its *informational content* directly into the handout's narrative. \n"
    "        *   **Rephrase and expand upon the slide's textual content** to fit the discoursive, explanatory, and SUPER CLEAR style of the handout. Do not just copy-paste bullet points; convert them into flowing sentences and paragraphs that enrich the existing text.\n"
    "4.  **Placement of `{{SLIDE:NNN}}` Placeholders:** When used, ensure `{{SLIDE:NNN}}` appears on its own dedicated line in the Markdown. This is critical for later visual formatting. Place it at the most logical point where the handout discusses the content visually represented on that slide.\n"
    "5.  **Clarity and Fruibility Over Conciseness:** Prioritize making concepts easy to grasp. If explaining a slide's content textually makes the handout clearer and more fluid, do so. If a visual is *also* critical, include the placeholder as well. The aim is a comprehensive, understandable document.\n"
    "6.  **Alignment:** Ensure any integrated textual content or slide placeholder is perfectly aligned with the part of the handout discussing that specific topic or concept.\n"
    "7.  **Maintain Style:** All new textual integrations from slides must match the established clear, engaging, and fluid narrative style of the handout.\n\n"
    "**Output Format:** The updated handout in Markdown, with slide placeholders and integrated slide text as specified."
)


def gemini_call(content: str, prompt: str, model, pdf_file: Path = None) -> str:
    try:
        if pdf_file and pdf_file.exists():
            # Upload PDF file to Gemini
            pdf_file_obj = genai.upload_file(str(pdf_file))
            # Wait for file processing
            time.sleep(2)
            response = model.generate_content([f"{prompt}\n\n---\n\n{content}", pdf_file_obj])
        else:
            response = model.generate_content(f"{prompt}\n\n---\n\n{content}")
        return response.text
    except Exception as e:
        print(f"Gemini API call failed: {e}")
        return content  # Return original content if API fails


def gemini_pipeline(raw: str, slides_info: List[Tuple[int, str]], api_key: str, pdf_deck: Path = None) -> str:
    if not api_key:
        print("No GEMINI_API_KEY – skipping LLM steps.")
        return raw
    
    try:
        genai.configure(api_key=api_key)
        model = genai.GenerativeModel("gemini-2.5-flash-preview-05-20") #KEEP THIS GEMINI FOR GOD SAKE

        print("Step 1: Rewriting transcript...")
        step1 = gemini_call(raw, GEM_PROMPT_REWRITE, model)
        
        print("Step 2: Reviewing for errors...")
        step2 = gemini_call(step1, GEM_PROMPT_REVIEW, model)

        if slides_info:
            print("Step 3: Inserting slide placeholders...")
            # Prepare brief slide list for prompt (truncate text to 120 chars each)
            slide_blob = "\n".join([f"{n:03}: {txt[:120]}" for n, txt in slides_info])
            place_prompt = GEM_PROMPT_PLACE + "\nSlides:\n" + slide_blob
            
            # Pass PDF file to the third Gemini call if available
            step3 = gemini_call(step2, place_prompt, model, pdf_deck)
            return step3
        else:
            print("No slides available, skipping placeholder step.")
            return step2
            
    except Exception as e:
        print(f"Gemini pipeline failed: {e}")
        return raw


# ────────────────────────────────────────────────────────────────────────────
# 5 ▸ Slide export + snippets
# ────────────────────────────────────────────────────────────────────────────

def export_slides(deck: Path, out_dir: Path) -> List[Tuple[int, str]]:
    out_dir.mkdir(exist_ok=True, parents=True)
    snippets: List[Tuple[int, str]] = []

    try:
        if deck.suffix.lower() == ".pptx":
            if not PPTX: # Check if pptx module is loaded
                if safe_pip("python-pptx Pillow"):
                    _lazy("pptx")
                    _lazy("PIL") # Pillow is also needed for image operations with pptx text
                else:
                    print("ERROR: python-pptx and Pillow could not be installed. Cannot process PPTX slides.")
                    return [] # Return empty if dependencies fail

            # Ensure PIL is available if pptx was already loaded but PIL wasn't
            if not PIL:
                if safe_pip("Pillow"):
                    _lazy("PIL")
                else:
                    print("ERROR: Pillow could not be installed. Cannot create placeholder images for PPTX.")
                    # Decide if you want to return [] or proceed without images but with text
                    # For consistency, if images are expected, let's return empty if PIL fails.
                    return []


            from pptx import Presentation
            # This import needs to be here or globally if PIL is always expected
            from PIL import Image, ImageDraw, ImageFont

            prs = Presentation(str(deck))
            print(f"Processing PPTX file: {deck.name} with {len(prs.slides)} slides.")

            for idx, slide in enumerate(prs.slides, 1):
                texts = []
                for shape in slide.shapes:
                    if hasattr(shape, "text_frame") and shape.text_frame and shape.text_frame.text:
                        texts.append(shape.text_frame.text)
                    elif hasattr(shape, "text") and shape.text: # For shapes like text boxes not in a frame
                        texts.append(shape.text)

                snippet_text = " ".join(texts).strip().replace('\n', ' ').replace('\r', ' ')
                snippet_text = re.sub(r'\s+', ' ', snippet_text) # Consolidate multiple spaces
                snippets.append((idx, snippet_text if snippet_text else f"Slide {idx} (PPTX - No text found)"))

                # Create a placeholder image
                try:
                    img = Image.new('RGB', (1024, 768), color='white') # Slightly higher res placeholder
                    draw = ImageDraw.Draw(img)
                    
                    try:
                        font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
                        title_font = ImageFont.truetype(font_path, 40)
                        text_font = ImageFont.truetype(font_path, 24)
                    except IOError:
                        print(f"Warning: DejaVuSans font not found at {font_path}. Using default font.")
                        title_font = ImageFont.load_default()
                        text_font = ImageFont.load_default() # May need to adjust size if default is tiny

                    draw.text((50, 50), f"Slide {idx:03}", fill='black', font=title_font)
                    
                    # Simple text wrapping for placeholder
                    if snippet_text:
                        display_text = snippet_text
                        lines = []
                        max_chars_per_line = 70 # Adjust based on image width and font
                        for i in range(0, len(display_text), max_chars_per_line):
                            lines.append(display_text[i:i+max_chars_per_line])
                        
                        y_text = 120
                        for line_idx, line in enumerate(lines):
                            if line_idx < 15: # Limit number of lines to prevent overflow
                                draw.text((50, y_text), line, fill='darkgray', font=text_font)
                                y_text += 30 # Adjust line spacing
                            else:
                                draw.text((50, y_text), "...", fill='darkgray', font=text_font)
                                break
                    else:
                        draw.text((50,120), "(No text content extracted from slide)", fill='gray', font=text_font)

                    img_path = out_dir / f"{idx:03}.png"
                    img.save(img_path)
                    # print(f"Saved placeholder image: {img_path}")

                except Exception as e_img:
                    print(f"Warning: Could not create placeholder image for PPTX slide {idx}: {e_img}")

        elif deck.suffix.lower() == ".pdf":
            # First try to install poppler-utils for pdf2image
            try:
                # Check if already installed to avoid redundant calls
                result = subprocess.run(["dpkg", "-s", "poppler-utils"], capture_output=True, text=True)
                if result.returncode != 0: # Not installed
                    print("poppler-utils not found, attempting installation...")
                    subprocess.check_call(["apt-get", "update", "-qq"])
                    subprocess.check_call(["apt-get", "install", "-y", "poppler-utils", "-qq"])
                    print("Successfully installed poppler-utils")
                else:
                    print("poppler-utils is already installed.")
            except Exception as e:
                print(f"Warning: Could not ensure poppler-utils installation: {e}")
                # This is a critical dependency for PDF to image conversion
                # If it fails, pdf2image will likely fail.

            if not PDF2IMAGE:
                if safe_pip("pdf2image"):
                    _lazy("pdf2image")
                else:
                    print("ERROR: pdf2image could not be installed. Cannot process PDF slides to images.")
                    # Fall through to placeholder creation if PIL is available
                    # or return empty if we must have actual images.
                    # For now, let the placeholder logic attempt if PIL is there.
                    pass

            # Ensure PIL is available for placeholders if PDF conversion fails
            if not PIL:
                if safe_pip("Pillow"):
                    _lazy("PIL")
                # If Pillow still fails, placeholder creation will fail.

            pages_converted = False
            if PDF2IMAGE: # Only attempt if pdf2image was successfully loaded
                from pdf2image import convert_from_path, pdfinfo_from_path
                try:
                    # Get page count to avoid fixed range in placeholder fallback
                    pdf_info = pdfinfo_from_path(str(deck))
                    num_pdf_pages = pdf_info.get("Pages", 0)
                    print(f"PDF Info: Found {num_pdf_pages} pages in {deck.name}")

                    print(f"Attempting PDF to PNG conversion with DPI 250 for {deck.name}...")
                    pages = convert_from_path(str(deck), dpi=250, timeout=300) # Added timeout
                    if not pages:
                        print("Warning: convert_from_path (DPI 250) returned no pages.")
                    else:
                        for idx, page in enumerate(pages, 1):
                            page_path = out_dir / f"{idx:03}.png"
                            page.save(page_path, "PNG")
                            snippets.append((idx, f"Slide {idx} (PDF Page Image)"))
                            # print(f"Saved PDF page image: {page_path}")
                        pages_converted = True
                        print(f"Successfully converted {len(pages)} pages with DPI 250.")
                except Exception as e_dpi250:
                    print(f"Error converting PDF pages with DPI 250: {e_dpi250}")
                    print("Trying with lower DPI (150)...")
                    try:
                        pages = convert_from_path(str(deck), dpi=150, timeout=300) # Added timeout
                        if not pages:
                            print("Warning: convert_from_path (DPI 150) returned no pages.")
                        else:
                            for idx, page in enumerate(pages, 1):
                                page_path = out_dir / f"{idx:03}.png"
                                page.save(page_path, "PNG")
                                snippets.append((idx, f"Slide {idx} (PDF Page Image)"))
                                # print(f"Saved PDF page image: {page_path}")
                            pages_converted = True
                            print(f"Successfully converted {len(pages)} pages with DPI 150.")
                    except Exception as e_dpi150:
                        print(f"Error converting PDF pages even with lower DPI (150): {e_dpi150}")
            else: # pdf2image not available
                print("pdf2image module not available. Cannot convert PDF pages to images.")
                # Try to get page count for placeholder loop if possible, otherwise use default
                try:
                    from pdf2image import pdfinfo_from_path # Try one last time for info
                    pdf_info = pdfinfo_from_path(str(deck))
                    num_pdf_pages = pdf_info.get("Pages", 10) # Default to 10 if can't get info
                except:
                    num_pdf_pages = 10 # Default if pdfinfo also fails

            # Fallback to placeholder images if no pages were converted AND PIL is available
            if not pages_converted:
                if not PIL:
                    print("ERROR: Pillow (PIL) is not available. Cannot create placeholder images for PDF.")
                else:
                    from PIL import Image, ImageDraw, ImageFont # Re-import just in case
                    print("PDF image conversion failed. Creating placeholder images...")
                    
                    # Determine number of placeholders to create
                    # If num_pdf_pages was successfully fetched, use it. Otherwise, a default.
                    # This variable 'num_pdf_pages' needs to be defined if we reach here and pdf2image was not available.
                    # Let's ensure num_pdf_pages is defined.
                    if 'num_pdf_pages' not in locals(): # If pdf2image wasn't even tried
                        try:
                            # Attempt to get page count if pdf2image could be imported for pdfinfo_from_path
                            # This is a bit redundant but covers a case where PDF2IMAGE flag is false
                            # but the module *might* still be importable for pdfinfo.
                            temp_pdf2image_module = __import__("pdf2image")
                            pdf_info = temp_pdf2image_module.pdfinfo_from_path(str(deck))
                            num_pdf_pages_for_fallback = pdf_info.get("Pages", 10)
                        except:
                            num_pdf_pages_for_fallback = 10 # Final fallback page count
                    else:
                        num_pdf_pages_for_fallback = num_pdf_pages if num_pdf_pages > 0 else 10


                    for i in range(1, num_pdf_pages_for_fallback + 1):
                        try:
                            img = Image.new('RGB', (1024, 768), color='lightgray')
                            draw = ImageDraw.Draw(img)
                            try:
                                font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
                                font = ImageFont.truetype(font_path, 60)
                                small_font = ImageFont.truetype(font_path, 30)
                            except IOError:
                                font = ImageFont.load_default()
                                small_font = ImageFont.load_default()

                            draw.text((img.width // 2, img.height // 2 - 50), f"Slide {i:03}", fill='black', font=font, anchor="mm")
                            draw.text((img.width // 2, img.height // 2 + 20), "(PDF Image Conversion Failed)", fill='red', font=small_font, anchor="mm")
                            
                            placeholder_path = out_dir / f"{i:03}.png"
                            img.save(placeholder_path)
                            snippets.append((i, f"Slide {i} (Placeholder - PDF Error)"))
                            # print(f"Saved placeholder image: {placeholder_path}")
                        except Exception as e_placeholder:
                            print(f"Warning: Could not create placeholder image for PDF slide {i}: {e_placeholder}")
                            break # Stop if placeholder creation fails for one
        else:
            raise ValueError(f"Unsupported deck format: {deck.suffix}")

    except Exception as e_main:
        print(f"Major error in export_slides function: {e_main}")
        # Depending on severity, you might want to clear snippets or re-raise
        return [] # Return empty list on major failure

    if not snippets:
        print("Warning: No slides or snippets were extracted/created.")
    return snippets

# ────────────────────────────────────────────────────────────────────────────
# 6 ▸ Replace {{SLIDE:NNN}} tokens → Markdown images
# ────────────────────────────────────────────────────────────────────────────

# def inject_images(md: str, slides_dir: Path) -> str:
#     pat = re.compile(r"\{\{SLIDE:(\d{3})(?:[^}]*)\}\}")
    
#     def replace_token(match):
#         slide_num = match.group(1)
#         image_path = slides_dir / f"{slide_num}.png"
#         if image_path.exists():
#             return f"![Slide {slide_num}]({slides_dir}/{slide_num}.png)"
#         else:
#             return f"[Slide {slide_num} - Image not found]"
    
#     return pat.sub(replace_token, md)

def inject_images(md: str, slides_dir: Path) -> str:
    # Regex to find {{SLIDE:NNN}} or {{SLIDE:NNN anything else}}
    pat = re.compile(r"\{\{SLIDE:(\d{3})(?:[^}]*)\}\}")

    def replace_token(match):
        slide_num_str = match.group(1) # Slide number as string, e.g., "001"
        # Define a desired width for slides, e.g., 60% of the page width.
        # Pandoc attribute for width: {width=60%}
        # Add a class for specific CSS styling in WeasyPrint: .slide-image
        # Adding newlines before and after to encourage block display and separation.
        image_path = slides_dir / f"{slide_num_str}.png"
        if image_path.exists():
            # Alt text will be "Slide NNN", which can become a caption in Pandoc
            alt_text = f"Slide {int(slide_num_str)}" # Convert to int for cleaner text
            return f"\n\n![{alt_text}]({slides_dir}/{slide_num_str}.png){{width=60% .slide-image}}\n\n"
        else:
            return f"\n\n[Slide {int(slide_num_str)} - Image not found]\n\n"

    return pat.sub(replace_token, md)


# ────────────────────────────────────────────────────────────────────────────
# 7 ▸ Markdown → PDF (Enhanced with better error handling)
# ────────────────────────────────────────────────────────────────────────────

def md_to_pdf(md: Path, pdf: Path):
    try:
        # Try installing texlive for pandoc PDF generation
        try:
            subprocess.check_call(["apt-get", "update", "-qq"])
            subprocess.check_call(["apt-get", "install", "-y", "texlive-xetex", "texlive-fonts-recommended", "-qq"])
            print("Successfully installed texlive")
        except Exception as e:
            print(f"Warning: Could not install texlive: {e}")
        
        # Try pandoc with xelatex engine
        try:
            subprocess.run(["pandoc", "--version"], check=True, capture_output=True)
            subprocess.run([
                "pandoc", str(md), "-o", str(pdf), 
                "--pdf-engine=xelatex",
                "--variable", "geometry:margin=1in"
            ], check=True)
            print("PDF generated successfully with pandoc")
            return
        except (subprocess.CalledProcessError, FileNotFoundError) as e:
            print(f"Pandoc with xelatex failed: {e}")
    
    except Exception as e:
        print(f"Pandoc route failed: {e}")
    
    # Fallback to WeasyPrint
    print("Falling back to WeasyPrint...")
    try:
        if not WEASYPRINT:
            if safe_pip("markdown-it-py[linkify,plugins] weasyprint"):
                _lazy("weasyprint")
            else:
                raise RuntimeError("Could not install weasyprint dependencies")
        
        # Import markdown-it-py
        from markdown_it import MarkdownIt
        from weasyprint import HTML, CSS
        
        md_text = md.read_text(encoding="utf-8")
        mdit = MarkdownIt("commonmark", {"linkify": True})
        html = mdit.render(md_text)
        
        # Wrap in basic HTML structure with better styling
        full_html = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <meta charset="utf-8">
            <style>
                body {{ 
                    font-family: Arial, sans-serif; 
                    margin: 40px; 
                    line-height: 1.6;
                }}
                img.slide-image {{ /* Target only images with the .slide-image class */
                            max-width: 60%;  /* Adjust percentage as needed for "small" */
                            height: auto;
                            display: block;       /* Crucial for margin:auto to work */
                            margin-left: auto;    /* Center the image block */
                            margin-right: auto;   /* Center the image block */
                            margin-top: 20px;     /* Space above the image */
                            margin-bottom: 20px;  /* Space below the image */
                        }}
                        img:not(.slide-image) {{ /* Styling for any other images if they exist */
                            max-width: 100%;
                            height: auto;
                            display: block; /* Optional: center other images too */
                            margin: 15px auto; /* Optional: center other images too */
                        }}
                        h1, h2, h3 {{
                    color: #333;
                    margin-top: 30px;
                }}
                p {{
                    margin-bottom: 15px;
                }}
                code {{
                    background-color: #f4f4f4;
                    padding: 2px 4px;
                    border-radius: 3px;
                }}
            </style>
        </head>
        <body>
        {html}
        </body>
        </html>
        """
        
        HTML(string=full_html, base_url=str(md.parent)).write_pdf(str(pdf))
        print("PDF generated successfully with WeasyPrint")
        
    except Exception as e:
        print(f"WeasyPrint also failed: {e}")
        print("Unable to generate PDF. Markdown file saved successfully.")


# ────────────────────────────────────────────────────────────────────────────
# 8 ▸ Orchestrator
# ────────────────────────────────────────────────────────────────────────────

def main():
    print("🎵 Transcript to Handout Pipeline")
    print("=" * 40)
    
    work_id = uid()
    
    # Step 1: Pick slide deck FIRST (as requested)
    print("\n1. Selecting slide deck...")
    deck = pick_slides()
    if deck:
        print(f"Selected slide deck: {deck}")
    else:
        print("No slide deck selected - continuing without slide integration")
    
    # Step 2: Pick media file
    print("\n2. Selecting media file...")
    media = pick_media()
    print(f"Selected: {media}")
    
    # Step 3: Convert to WAV
    print("\n3. Converting to WAV...")
    wav = Path(f"audio_{work_id}.wav")
    to_wav(media, wav)
    
    # Step 4: Transcribe
    print("\n4. Transcribing audio...")
    transcript = whisper_transcribe(wav)
    print(f"Transcript length: {len(transcript)} characters")
    
    # Step 5: Process slides
    print("\n5. Processing slides...")
    slides_dir = Path(f"slides_{work_id}")
    slides_info: List[Tuple[int, str]] = []
    
    if deck:
        print(f"Exporting slides from {deck}...")
        slides_info = export_slides(deck, slides_dir)
        print(f"Exported {len(slides_info)} slides")
    
    # Step 6: Process with Gemini
    print("\n6. Processing with Gemini...")
    api_key = get_api_key()
    
    # Pass the PDF deck to Gemini if it's a PDF file
    pdf_deck = deck if deck and deck.suffix.lower() == ".pdf" else None
    final_md = gemini_pipeline(transcript, slides_info, api_key, pdf_deck)
    
    # Step 7: Save markdown with images
    print("\n7. Generating final markdown...")
    md_path = Path(f"handout_{work_id}.md")
    if slides_info and slides_dir.exists():
        final_content = inject_images(final_md, slides_dir)
    else:
        final_content = final_md
    
    md_path.write_text(final_content, encoding="utf-8")
    print(f"Markdown saved: {md_path}")
    
    # Step 8: Generate PDF
    print("\n8. Generating PDF...")
    pdf_path = Path(f"handout_{work_id}.pdf")
    md_to_pdf(md_path, pdf_path)
    
    if pdf_path.exists():
        print(f"✅ Success! Output files:")
        print(f"   📄 Markdown: {md_path}")
        print(f"   📄 PDF: {pdf_path}")
        if slides_dir.exists():
            print(f"   🖼️  Slides: {slides_dir}/")
    else:
        print(f"⚠️  PDF generation failed, but markdown is available: {md_path}")
    
    # Cleanup
    print("\n9. Cleaning up...")
    wav.unlink(missing_ok=True)
    print("Done!")


if __name__ == "__main__":
    main()

🎵 Transcript to Handout Pipeline

1. Selecting slide deck...

Available slide decks:
 [0] input/gdl-lect3-and-5/slides_lect5a.pdf
 [1] input/gdl-lect3-and-5/slides_lect3.pdf
 [2] input/gdl-lect3-and-5/slides_lect_5b.pdf
 [3] working/handout_86ene9.pdf
 [4] working/handout_o9nrjg.pdf
 [5] Skip slides (no slide integration)


Choose slide deck 0–5 (or 5 to skip):  2


Selected slide deck: /kaggle/input/gdl-lect3-and-5/slides_lect_5b.pdf

2. Selecting media file...
Available media files:
 [0] input/gdl-lect3-and-5/lecture5b_audioOnly.wav
 [1] input/gdl-lect3-and-5/lecture5a_audioonly.wav
 [2] input/gdl-lect3-and-5/lecture3_audioOnly.wav
 [3] working/audio_bmm2zr.wav


Choose file 0–3:  0


Selected: /kaggle/input/gdl-lect3-and-5/lecture5b_audioOnly.wav

3. Converting to WAV...

4. Transcribing audio...
Using device: cuda


Device set to use cuda:0


Transcript length: 97550 characters

5. Processing slides...
Exporting slides from /kaggle/input/gdl-lect3-and-5/slides_lect_5b.pdf...
poppler-utils is already installed.
PDF Info: Found 84 pages in slides_lect_5b.pdf
Attempting PDF to PNG conversion with DPI 250 for slides_lect_5b.pdf...
Successfully converted 84 pages with DPI 250.
Exported 84 slides

6. Processing with Gemini...
Step 1: Rewriting transcript...
Step 2: Reviewing for errors...
Step 3: Inserting slide placeholders...

7. Generating final markdown...
Markdown saved: handout_8003ue.md

8. Generating PDF...


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


Successfully installed texlive
Pandoc with xelatex failed: [Errno 2] No such file or directory: 'pandoc'
Falling back to WeasyPrint...
PDF generated successfully with WeasyPrint
✅ Success! Output files:
   📄 Markdown: handout_8003ue.md
   📄 PDF: handout_8003ue.pdf
   🖼️  Slides: slides_8003ue/

9. Cleaning up...
Done!


In [6]:
!zip -r slides_8003ue.zip slides_8003ue

  adding: slides_8003ue/ (stored 0%)
  adding: slides_8003ue/046.png (deflated 6%)
  adding: slides_8003ue/081.png (deflated 23%)
  adding: slides_8003ue/084.png (deflated 13%)
  adding: slides_8003ue/051.png (deflated 6%)
  adding: slides_8003ue/037.png (deflated 3%)
  adding: slides_8003ue/017.png (deflated 11%)
  adding: slides_8003ue/033.png (deflated 13%)
  adding: slides_8003ue/028.png (deflated 12%)
  adding: slides_8003ue/060.png (deflated 15%)
  adding: slides_8003ue/064.png (deflated 17%)
  adding: slides_8003ue/031.png (deflated 12%)
  adding: slides_8003ue/013.png (deflated 13%)
  adding: slides_8003ue/079.png (deflated 14%)
  adding: slides_8003ue/074.png (deflated 12%)
  adding: slides_8003ue/057.png (deflated 6%)
  adding: slides_8003ue/050.png (deflated 13%)
  adding: slides_8003ue/026.png (deflated 14%)
  adding: slides_8003ue/045.png (deflated 7%)
  adding: slides_8003ue/047.png (deflated 4%)
  adding: slides_8003ue/075.png (deflated 4%)
  adding: slides_8003ue/063.pn