# Catalan Lecture Processor

Upload a university lecture recorded in Catalan and get:
- **Full transcription** (using Whisper fine-tuned for Catalan)
- **Cleaned text** (filler words removed, restructured into paragraphs)
- **Translations** (Spanish, English, Portuguese, Italian)
- **Summary** with key concepts
- **PowerPoint slides**

## Instructions
1. **Runtime > Change runtime type > T4 GPU** (important!)
2. **Runtime > Run all** (or click each cell in order)
3. Wait ~2-3 minutes for models to download
4. A **public link** will appear at the bottom - open it on your phone or computer
5. Upload your audio file, select languages, click **Process Lecture**

In [None]:
# Cell 1: Check GPU
!nvidia-smi
import torch
print(f"\nCUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB")
else:
    print("WARNING: No GPU detected! Go to Runtime > Change runtime type > T4 GPU")

In [None]:
# Cell 2: Install dependencies
%%capture
!pip install -q transformers accelerate gradio python-pptx
!pip install -q google-genai huggingface-hub faster-whisper
!pip install -q sentencepiece protobuf pydub tqdm
!apt-get -qq install -y ffmpeg
print("Dependencies installed!")

In [None]:
# Cell 3: Optional - Set Gemini API key for better text cleanup & summarization
# Get a FREE key at https://ai.google.dev/
# Leave empty to skip (system will still work, just without LLM cleanup)

import os
GEMINI_API_KEY = ""  # @param {type:"string"}
if GEMINI_API_KEY:
    os.environ["GEMINI_API_KEY"] = GEMINI_API_KEY
    print("Gemini API key set!")
else:
    print("No Gemini key - text cleanup will use regex only (still works fine)")

In [None]:
# Cell 4: Core configuration

import re
import os
import gc
import time
import logging
import tempfile
from datetime import date

import requests

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
logger = logging.getLogger("lecture_processor")

# ── Configuration ──────────────────────────────────────────────────────────────

WHISPER_HF_MODEL = "projecte-aina/whisper-large-v3-ca-3catparla"
NLLB_MODEL = "facebook/nllb-200-distilled-600M"

LANGUAGE_CODES = {
    "Catalan": "cat_Latn",
    "Spanish": "spa_Latn",
    "English": "eng_Latn",
    "Portuguese": "por_Latn",
    "Italian": "ita_Latn",
}
TARGET_LANGUAGES = ["Spanish", "English", "Portuguese", "Italian"]

CATALAN_FILLERS = [
    r"per dir-ho d'alguna manera", r"diguem-ne", r"a veure", r"o sigui",
    r"vull dir", r"\u00e9s a dir", r"llavors", r"bueno", r"doncs",
    r"saps\?", r"vale", r"clar", r"oi\?", r"no\?", r"b\u00e9",
    r"ehm+", r"eh+", r"mm+", r"um+", r"ah+",
]

CLEANUP_PROMPT = """You are a text editor. The following is a transcription of a university \
lecture in Catalan. Clean it up by:
1. Organizing into logical paragraphs
2. Fixing any obvious transcription errors
3. Removing remaining verbal fillers or repetitions
4. Do NOT change the language or the meaning
5. Do NOT add any commentary or explanations
6. Return ONLY the cleaned text

Transcription:
{text}"""

SUMMARY_PROMPT = """You are an academic assistant. Summarize the following university lecture \
transcript. The lecture is in {language}. Provide your summary in {language}.

Format your response as:
## Main Topics
- [bullet points of 5-10 key topics covered]

## Detailed Summary
[2-3 paragraphs summarizing the lecture content]

## Key Terms
- [list of important technical terms or concepts mentioned]

Transcript:
{text}"""

CHUNK_SUMMARY_PROMPT = """You are an academic assistant. Summarize this section of a university \
lecture transcript. The lecture is in {language}. Provide a concise summary in {language} \
capturing the key points, concepts, and any important terminology.

Section:
{text}"""

SUPPORTED_AUDIO_FORMATS = [".m4a", ".mp3", ".wav", ".ogg", ".webm", ".flac"]
NLLB_MAX_LENGTH = 512
NLLB_BATCH_MAX_TOKENS = 400
LLM_CHUNK_MAX_WORDS = 3000
SLIDE_MAX_BULLETS = 6

print("Configuration loaded!")

In [None]:
# Cell 5: Transcriber (GPU path using transformers pipeline)

class Transcriber:
    def __init__(self):
        self._pipe = None

    def _load_model(self):
        import torch
        from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

        logger.info("Loading Whisper model: %s", WHISPER_HF_MODEL)
        processor = AutoProcessor.from_pretrained(WHISPER_HF_MODEL)
        model = AutoModelForSpeechSeq2Seq.from_pretrained(
            WHISPER_HF_MODEL, torch_dtype=torch.float16, low_cpu_mem_usage=True
        ).to("cuda:0")

        self._pipe = pipeline(
            "automatic-speech-recognition",
            model=model,
            tokenizer=processor.tokenizer,
            feature_extractor=processor.feature_extractor,
            torch_dtype=torch.float16,
            device="cuda:0",
            chunk_length_s=30,
            batch_size=16,
            return_timestamps=True,
        )
        logger.info("Whisper model loaded on GPU")

    def transcribe(self, audio_path, progress_callback=None):
        if self._pipe is None:
            if progress_callback:
                progress_callback(0.05, desc="Loading Whisper model...")
            self._load_model()

        if progress_callback:
            progress_callback(0.10, desc="Transcribing audio...")

        t0 = time.time()
        output = self._pipe(
            audio_path,
            generate_kwargs={"language": "ca", "task": "transcribe"},
        )

        segments = []
        if "chunks" in output:
            for chunk in output["chunks"]:
                ts = chunk.get("timestamp", (None, None))
                segments.append({
                    "start": ts[0] if ts[0] is not None else 0.0,
                    "end": ts[1] if ts[1] is not None else 0.0,
                    "text": chunk["text"].strip(),
                })

        if progress_callback:
            progress_callback(0.50, desc="Transcription complete")

        return {
            "text": output["text"].strip(),
            "segments": segments,
            "language": "ca",
            "duration_seconds": time.time() - t0,
        }

print("Transcriber ready")

In [None]:
# Cell 6: Text Cleaner (regex + optional LLM)

def _build_filler_pattern(fillers):
    sorted_fillers = sorted(fillers, key=len, reverse=True)
    pattern = r"\b(?:" + "|".join(sorted_fillers) + r")\b"
    return re.compile(pattern, re.IGNORECASE | re.UNICODE)

class TextCleaner:
    def __init__(self):
        self._filler_pattern = _build_filler_pattern(CATALAN_FILLERS)

    def _has_gemini(self):
        return bool(os.environ.get("GEMINI_API_KEY"))

    def _call_gemini(self, prompt):
        from google import genai
        client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
        response = client.models.generate_content(
            model="gemini-2.0-flash", contents=prompt
        )
        return response.text.strip()

    def regex_clean(self, text):
        if not text:
            return ""
        cleaned = self._filler_pattern.sub("", text)
        cleaned = re.sub(r"[ \t]+", " ", cleaned)
        cleaned = re.sub(r" ([.,;:!?])", r"\1", cleaned)
        cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
        cleaned = cleaned.strip()
        # Fix capitalization
        cleaned = re.sub(
            r"([.!?]\s+)([a-z\u00e1\u00e0\u00e9\u00e8\u00ed\u00ef\u00f3\u00f2\u00fa\u00fc\u00e7])",
            lambda m: m.group(1) + m.group(2).upper(), cleaned
        )
        if cleaned and cleaned[0].islower():
            cleaned = cleaned[0].upper() + cleaned[1:]
        return cleaned

    def clean(self, text, progress_callback=None):
        if progress_callback:
            progress_callback(0.50, desc="Removing filler words...")
        regex_cleaned = self.regex_clean(text)

        llm_cleaned = None
        if self._has_gemini():
            if progress_callback:
                progress_callback(0.52, desc="Restructuring text with Gemini...")
            try:
                chunks = _chunk_text(regex_cleaned)
                parts = []
                for i, chunk in enumerate(chunks):
                    if progress_callback:
                        frac = 0.52 + 0.08 * ((i + 1) / len(chunks))
                        progress_callback(frac, desc=f"Restructuring chunk {i+1}/{len(chunks)}...")
                    prompt = CLEANUP_PROMPT.format(text=chunk)
                    parts.append(self._call_gemini(prompt))
                llm_cleaned = "\n\n".join(parts)
            except Exception as e:
                logger.warning("Gemini cleanup failed: %s", e)

        return {
            "regex_cleaned": regex_cleaned,
            "llm_cleaned": llm_cleaned,
            "best": llm_cleaned if llm_cleaned else regex_cleaned,
        }

def _chunk_text(text, max_words=LLM_CHUNK_MAX_WORDS):
    sentences = re.split(r"(?<=[.!?])\s+", text)
    chunks, current_chunk, current_count = [], [], 0
    for sentence in sentences:
        word_count = len(sentence.split())
        if current_count + word_count > max_words and current_chunk:
            chunks.append(" ".join(current_chunk))
            current_chunk, current_count = [], 0
        current_chunk.append(sentence)
        current_count += word_count
    if current_chunk:
        chunks.append(" ".join(current_chunk))
    return chunks

print("TextCleaner ready")

In [None]:
# Cell 7: Translator (NLLB-200)

class Translator:
    def __init__(self):
        self._model = None
        self._tokenizer = None

    def _load_model(self):
        from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
        import torch

        logger.info("Loading NLLB-200 translation model...")
        self._tokenizer = AutoTokenizer.from_pretrained(NLLB_MODEL)
        self._model = AutoModelForSeq2SeqLM.from_pretrained(NLLB_MODEL)
        if torch.cuda.is_available():
            self._model = self._model.half().to("cuda")
        logger.info("Translation model loaded")

    def _ensure_loaded(self):
        if self._model is None:
            self._load_model()

    def _split_into_batches(self, text):
        sentences = re.split(r"(?<=[.!?;:])\s+", text)
        batches, current_batch, current_tokens = [], [], 0
        for sentence in sentences:
            tokens = len(self._tokenizer.encode(sentence, add_special_tokens=False))
            if current_tokens + tokens > NLLB_BATCH_MAX_TOKENS and current_batch:
                batches.append(" ".join(current_batch))
                current_batch, current_tokens = [], 0
            current_batch.append(sentence)
            current_tokens += tokens
        if current_batch:
            batches.append(" ".join(current_batch))
        return batches

    def translate_text(self, text, source_lang, target_lang):
        if not text or not text.strip():
            return ""
        self._ensure_loaded()

        src_code = LANGUAGE_CODES[source_lang]
        tgt_code = LANGUAGE_CODES[target_lang]
        tgt_token_id = self._tokenizer.convert_tokens_to_ids(tgt_code)

        self._tokenizer.src_lang = src_code
        batches = self._split_into_batches(text)
        translated_parts = []

        device = "cuda" if next(self._model.parameters()).is_cuda else "cpu"
        for batch in batches:
            inputs = self._tokenizer(
                batch, return_tensors="pt", padding=True,
                truncation=True, max_length=NLLB_MAX_LENGTH
            )
            if device == "cuda":
                inputs = {k: v.to("cuda") for k, v in inputs.items()}
            outputs = self._model.generate(
                **inputs, forced_bos_token_id=tgt_token_id, max_length=NLLB_MAX_LENGTH
            )
            decoded = self._tokenizer.batch_decode(outputs, skip_special_tokens=True)
            translated_parts.extend(decoded)

        return " ".join(translated_parts)

    def translate_to_languages(self, text, target_languages, progress_callback=None):
        translations = {}
        for i, lang in enumerate(target_languages):
            if progress_callback:
                frac = 0.60 + 0.20 * (i / len(target_languages))
                progress_callback(frac, desc=f"Translating to {lang}...")
            try:
                translations[lang] = self.translate_text(text, "Catalan", lang)
            except Exception as e:
                logger.error("Translation to %s failed: %s", lang, e)
                translations[lang] = f"[Translation to {lang} failed: {e}]"
        if progress_callback:
            progress_callback(0.80, desc="Translation complete")
        return translations

print("Translator ready")

In [None]:
# Cell 8: Summarizer

class Summarizer:
    def __init__(self):
        pass

    def _has_gemini(self):
        return bool(os.environ.get("GEMINI_API_KEY"))

    def _call_gemini(self, prompt):
        from google import genai
        client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
        response = client.models.generate_content(
            model="gemini-2.0-flash", contents=prompt
        )
        return response.text.strip()

    def summarize(self, text, language="Catalan", progress_callback=None):
        if progress_callback:
            progress_callback(0.80, desc="Generating summary...")

        if not self._has_gemini():
            logger.info("No Gemini key, skipping summarization")
            return {"raw_summary": None, "main_topics": [], "detailed_summary": "",
                    "key_terms": [], "sections": []}

        word_count = len(text.split())
        try:
            if word_count > LLM_CHUNK_MAX_WORDS:
                raw = self._map_reduce(text, language, progress_callback)
            else:
                raw = self._call_gemini(SUMMARY_PROMPT.format(language=language, text=text))
        except Exception as e:
            logger.warning("Summarization failed: %s", e)
            return {"raw_summary": None, "main_topics": [], "detailed_summary": "",
                    "key_terms": [], "sections": []}

        if progress_callback:
            progress_callback(0.90, desc="Summary complete")

        return {
            "raw_summary": raw,
            "main_topics": self._extract_list(raw, "Main Topics"),
            "detailed_summary": self._extract_section(raw, "Detailed Summary"),
            "key_terms": self._extract_list(raw, "Key Terms"),
            "sections": self._parse_sections(raw),
        }

    def _map_reduce(self, text, language, progress_callback=None):
        chunks = _chunk_text(text)
        summaries = []
        for i, chunk in enumerate(chunks):
            if progress_callback:
                frac = 0.82 + 0.06 * ((i + 1) / len(chunks))
                progress_callback(frac, desc=f"Summarizing section {i+1}/{len(chunks)}...")
            prompt = CHUNK_SUMMARY_PROMPT.format(language=language, text=chunk)
            summaries.append(self._call_gemini(prompt))
        combined = "\n\n".join(summaries)
        if progress_callback:
            progress_callback(0.88, desc="Generating final summary...")
        return self._call_gemini(SUMMARY_PROMPT.format(language=language, text=combined))

    def _parse_sections(self, text):
        sections, title, bullets = [], None, []
        for line in text.split("\n"):
            line = line.strip()
            if line.startswith("## "):
                if title:
                    sections.append({"title": title, "bullets": bullets})
                title, bullets = line[3:].strip(), []
            elif (line.startswith("- ") or line.startswith("* ")) and title:
                bullets.append(line[2:].strip())
            elif line and title:
                bullets.append(line)
        if title:
            sections.append({"title": title, "bullets": bullets})
        return sections

    def _extract_list(self, text, header):
        in_section, items = False, []
        for line in text.split("\n"):
            s = line.strip()
            if s.startswith("## ") and header.lower() in s.lower():
                in_section = True
            elif s.startswith("## ") and in_section:
                break
            elif in_section and (s.startswith("- ") or s.startswith("* ")):
                items.append(s[2:].strip())
        return items

    def _extract_section(self, text, header):
        in_section, parts = False, []
        for line in text.split("\n"):
            s = line.strip()
            if s.startswith("## ") and header.lower() in s.lower():
                in_section = True
            elif s.startswith("## ") and in_section:
                break
            elif in_section and s:
                parts.append(s)
        return "\n\n".join(parts)

print("Summarizer ready")

In [None]:
# Cell 9: Slide Generator

from pptx import Presentation as PptxPresentation
from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN

COLOR_TITLE = RGBColor(0x1A, 0x47, 0x8A)
COLOR_BODY = RGBColor(0x33, 0x33, 0x33)
COLOR_LIGHT = RGBColor(0x85, 0x92, 0x9E)

class SlideGenerator:
    def generate(self, summary_data, title="Lecture Summary", output_path=None):
        prs = PptxPresentation()
        prs.slide_width = Inches(13.333)
        prs.slide_height = Inches(7.5)

        # Title slide
        slide = prs.slides.add_slide(prs.slide_layouts[0])
        slide.shapes.title.text = title
        for p in slide.shapes.title.text_frame.paragraphs:
            p.font.size, p.font.color.rgb, p.font.bold = Pt(36), COLOR_TITLE, True
        if len(slide.placeholders) > 1:
            slide.placeholders[1].text = f"Generated {date.today().isoformat()}"

        # Overview
        topics = summary_data.get("main_topics", [])
        if topics:
            self._add_content_slide(prs, "Overview", topics)

        # Sections
        for sec in summary_data.get("sections", []):
            if sec.get("title") and sec.get("bullets"):
                self._add_content_slide(prs, sec["title"], sec["bullets"])

        # Key terms
        terms = summary_data.get("key_terms", [])
        if terms:
            self._add_content_slide(prs, "Key Terms & Concepts", terms)

        # End slide
        slide = prs.slides.add_slide(prs.slide_layouts[0])
        slide.shapes.title.text = "End of Summary"
        for p in slide.shapes.title.text_frame.paragraphs:
            p.font.size, p.font.color.rgb = Pt(36), COLOR_TITLE
            p.alignment = PP_ALIGN.CENTER

        if output_path is None:
            output_path = f"/tmp/lecture_slides_{date.today().isoformat()}.pptx"
        prs.save(output_path)
        return output_path

    def _add_content_slide(self, prs, title_text, bullets):
        for idx in range(0, len(bullets), SLIDE_MAX_BULLETS):
            page = bullets[idx:idx + SLIDE_MAX_BULLETS]
            st = f"{title_text} (cont.)" if idx > 0 else title_text
            slide = prs.slides.add_slide(prs.slide_layouts[1])
            slide.shapes.title.text = st
            for p in slide.shapes.title.text_frame.paragraphs:
                p.font.size, p.font.color.rgb, p.font.bold = Pt(28), COLOR_TITLE, True
            if len(slide.placeholders) > 1:
                tf = slide.placeholders[1].text_frame
                tf.clear()
                for i, b in enumerate(page):
                    p = tf.paragraphs[0] if i == 0 else tf.add_paragraph()
                    p.text = b
                    p.font.size, p.font.color.rgb = Pt(18), COLOR_BODY
                    p.space_after = Pt(8)

print("SlideGenerator ready")

In [None]:
# Cell 10: Pipeline Orchestrator

import shutil
import zipfile

TOTAL_STEPS = 5

def _progress_bar(fraction, width=30):
    """Render a text-based progress bar like [████████░░░░░░░░░░░░] 40%"""
    fraction = max(0.0, min(1.0, fraction))
    filled = int(width * fraction)
    bar = "\u2588" * filled + "\u2591" * (width - filled)
    return f"[{bar}] {fraction * 100:.0f}%"

def _step(n, title, detail="", progress=None):
    """Format a step status string with optional progress bar."""
    header = f"Step {n}/{TOTAL_STEPS} \u2014 {title}"
    if progress is not None:
        header += f"  {_progress_bar(progress)}"
    if detail:
        return f"{header}\n{detail}"
    return header

class LectureProcessor:
    def __init__(self):
        self._transcriber = Transcriber()
        self._cleaner = TextCleaner()
        self._translator = Translator()
        self._summarizer = Summarizer()
        self._slides = SlideGenerator()

    def process(self, audio_path, target_languages):
        """Generator that yields (status_message, results_dict) tuples."""
        results = {
            "transcript_raw": None, "transcript_clean": None,
            "translations": {}, "summary": None,
            "summaries": {}, "summaries_data": {},
            "all_files": {}, "errors": [], "timings": {},
        }
        output_dir = tempfile.mkdtemp(prefix="lecture_")

        # Copy audio so Gradio and transcriber don't lock the same file
        audio_copy = os.path.join(output_dir, os.path.basename(audio_path))
        shutil.copy2(audio_path, audio_copy)
        audio_path = audio_copy

        # Validate
        ext = os.path.splitext(audio_path)[1].lower()
        if ext not in SUPPORTED_AUDIO_FORMATS:
            results["errors"].append(f"Unsupported format: {ext}")
            yield (f"Error: Unsupported format {ext}", results)
            return

        # Step 1: Transcribe
        yield (_step(1, "Transcribing", "Loading model... (this takes up to 30s)"), results)
        t0 = time.time()
        try:
            tr = self._transcriber.transcribe(audio_path)
            results["transcript_raw"] = tr["text"]
            results["timings"]["transcription"] = time.time() - t0
            path = os.path.join(output_dir, "transcript_raw.txt")
            open(path, "w", encoding="utf-8").write(results["transcript_raw"])
            results["all_files"]["transcript_raw.txt"] = path
        except Exception as e:
            results["errors"].append(f"Transcription failed: {e}")
            yield (f"Transcription failed: {e}", results)
            return

        # Step 2: Clean text
        yield (_step(2, "Cleaning text", "Removing filler words..."), results)
        t0 = time.time()
        try:
            cr = self._cleaner.clean(results["transcript_raw"])
            results["transcript_clean"] = cr["best"]
            results["timings"]["cleanup"] = time.time() - t0
            path = os.path.join(output_dir, "transcript_clean.txt")
            open(path, "w", encoding="utf-8").write(results["transcript_clean"])
            results["all_files"]["transcript_clean.txt"] = path
        except Exception as e:
            results["errors"].append(f"Cleanup failed: {e}")
            results["transcript_clean"] = results["transcript_raw"]

        # Step 3: Translate
        yield (_step(3, "Translating", "Loading model... (this takes up to 30s)"), results)
        t0 = time.time()
        text_to_translate = results["transcript_clean"] or results["transcript_raw"]
        try:
            self._translator._ensure_loaded()
            num_langs = len(target_languages)
            for i, lang in enumerate(target_languages):
                frac = i / num_langs
                yield (_step(3, "Translating", f"{lang}...", progress=frac), results)
                try:
                    results["translations"][lang] = self._translator.translate_text(
                        text_to_translate, "Catalan", lang
                    )
                    txt = results["translations"][lang]
                    if not txt.startswith("[Translation"):
                        fn = f"translation_{lang.lower()}.txt"
                        path = os.path.join(output_dir, fn)
                        open(path, "w", encoding="utf-8").write(txt)
                        results["all_files"][fn] = path
                except Exception as e:
                    logger.error("Translation to %s failed: %s", lang, e)
                    results["translations"][lang] = f"[Translation to {lang} failed: {e}]"
            results["timings"]["translation"] = time.time() - t0
        except Exception as e:
            results["errors"].append(f"Translation failed: {e}")

        # Step 4: Summarize (per translated language)
        t0 = time.time()
        num_summary_langs = len(target_languages)
        for lang_idx, lang in enumerate(target_languages):
            translated_text = results["translations"].get(lang, "")
            if not translated_text or translated_text.startswith("[Translation"):
                continue
            frac = lang_idx / num_summary_langs
            yield (_step(4, "Summarizing", f"{lang}...", progress=frac), results)
            try:
                sd = self._summarizer.summarize(translated_text, lang)
                raw = sd.get("raw_summary")
                results["summaries"][lang] = raw
                results["summaries_data"][lang] = sd
                if raw:
                    fn = f"summary_{lang.lower()}.md"
                    path = os.path.join(output_dir, fn)
                    open(path, "w", encoding="utf-8").write(raw)
                    results["all_files"][fn] = path
            except Exception as e:
                logger.error("Summarization for %s failed: %s", lang, e)
                results["errors"].append(f"Summarization ({lang}) failed: {e}")
        results["timings"]["summarization"] = time.time() - t0

        # Build combined summary for UI display
        summary_parts = []
        for lang in target_languages:
            raw = results["summaries"].get(lang)
            if raw:
                summary_parts.append(f"## {lang}\n\n{raw}")
        results["summary"] = "\n\n---\n\n".join(summary_parts) if summary_parts else None

        # Step 5: Generate slides (per language)
        yield (_step(5, "Creating slides"), results)
        t0 = time.time()
        slides_created = 0
        audio_name = os.path.splitext(os.path.basename(audio_path))[0]
        for lang in target_languages:
            sd = results.get("summaries_data", {}).get(lang, {})
            if not (sd.get("main_topics") or sd.get("sections")):
                continue
            try:
                slides_path = os.path.join(output_dir, f"lecture_slides_{lang.lower()}.pptx")
                self._slides.generate(sd, title=f"{audio_name} ({lang})", output_path=slides_path)
                results["all_files"][f"lecture_slides_{lang.lower()}.pptx"] = slides_path
                slides_created += 1
            except Exception as e:
                logger.error("Slide generation for %s failed: %s", lang, e)
                results["errors"].append(f"Slides ({lang}) failed: {e}")
        if slides_created == 0:
            results["errors"].append("Slides skipped: no summary data available")
        results["timings"]["slides"] = time.time() - t0

        # Create ZIP of all output files
        if results["all_files"]:
            try:
                zip_path = os.path.join(output_dir, "lecture_all_files.zip")
                with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
                    for filename, filepath in results["all_files"].items():
                        zf.write(filepath, filename)
                results["zip_path"] = zip_path
            except Exception as e:
                logger.error("ZIP creation failed: %s", e)

        timing_parts = [f"{k}: {v:.1f}s" for k, v in results["timings"].items()]
        status = "Done! " + " | ".join(timing_parts) if timing_parts else "Done!"
        yield (status, results)

print("LectureProcessor ready")

In [None]:
# Cell 11: Gradio UI and Launch

import gradio as gr

processor = LectureProcessor()

with gr.Blocks(title="Catalan Lecture Processor") as app:

    gr.Markdown(
        "# Catalan Lecture Processor\n"
        "Upload a lecture recording in Catalan. Get transcription, "
        "translation, summary, and PowerPoint slides."
    )

    with gr.Group():
        gr.Markdown("### Upload Audio")
        audio_input = gr.Audio(
            label="Lecture audio (m4a, mp3, wav, ogg, webm, flac)",
            type="filepath", sources=["upload"],
        )
        target_langs = gr.CheckboxGroup(
            choices=TARGET_LANGUAGES, value=["Spanish", "English"],
            label="Translate to:",
        )
        process_btn = gr.Button("Process Lecture", variant="primary", size="lg")

    # Status above tabs for visibility
    status_text = gr.Textbox(label="Status", interactive=False, lines=3)
    errors_text = gr.Textbox(label="Warnings", interactive=False, lines=2, visible=False)

    with gr.Tabs():
        with gr.Tab("Transcript"):
            transcript_raw = gr.Textbox(label="Raw Transcription (Catalan)", lines=12, max_lines=30)
            transcript_clean = gr.Textbox(label="Cleaned Transcription", lines=12, max_lines=30)

        with gr.Tab("Translations"):
            trans_boxes = {}
            for lang in TARGET_LANGUAGES:
                trans_boxes[lang] = gr.Textbox(
                    label=f"{lang} Translation", lines=10,
                    visible=(lang in ["Spanish", "English"]),
                )

        with gr.Tab("Summary"):
            summary_output = gr.Markdown(label="Lecture Summary")

        with gr.Tab("Downloads"):
            download_zip = gr.File(
                label="Download All (ZIP)",
                file_count="single",
            )
            gr.Markdown("Or download individual files:")
            download_files = gr.File(label="Individual Files", file_count="multiple")

    def update_visibility(languages):
        return [gr.update(visible=(l in languages)) for l in TARGET_LANGUAGES]

    target_langs.change(
        update_visibility, inputs=[target_langs],
        outputs=list(trans_boxes.values()),
    )

    def process_lecture(audio, languages):
        """Generator that yields UI updates from the pipeline."""
        if audio is None:
            raise gr.Error("Please upload an audio file first.")
        if not languages:
            raise gr.Error("Please select at least one target language.")

        def build_output(status, results):
            raw = results.get("transcript_raw") or ""
            clean = results.get("transcript_clean") or ""
            translations = results.get("translations", {})
            trans_outputs = [translations.get(l, "") for l in TARGET_LANGUAGES]
            summary = results.get("summary") or ""
            zip_path = results.get("zip_path")
            files_list = list(results.get("all_files", {}).values()) or None
            errors = results.get("errors", [])
            errors_str = "\n".join(errors) if errors else ""
            return (
                status,
                gr.update(value=errors_str, visible=bool(errors)),
                raw, clean,
                *trans_outputs,
                summary, zip_path, files_list,
            )

        for status, results in processor.process(audio, languages):
            yield build_output(status, results)

    all_outputs = [
        status_text, errors_text,
        transcript_raw, transcript_clean,
        *list(trans_boxes.values()),
        summary_output, download_zip, download_files,
    ]

    process_btn.click(
        process_lecture,
        inputs=[audio_input, target_langs],
        outputs=all_outputs,
    )

# Launch with public link for phone access
app.launch(
    share=True,
    debug=True,
    theme=gr.themes.Soft(),
    css=".gradio-container { max-width: 960px !important; margin: auto; }",
)