In [5]:
import gradio as gr
import requests
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from urllib.parse import urlparse, parse_qs

# -------------------------
# ❇ PUT YOUR API KEY HERE
# -------------------------
GEMINI_API_KEY = "AIzaSyD-jp5S2wFo1lpVnbyk0egYnzeVb2KWXSk"  # <-- REPLACE with your valid Gemini API key (keep quotes)
MODEL = "gemini-2.5-flash"                   # chosen from your account's model list (supports generateContent)
# ----------------------------------------------------------------

# ---- video id extractor ----
def extract_video_id(url: str):
    if not url:
        return None
    try:
        u = urlparse(url)
        if "youtu.be" in u.netloc:
            return u.path.lstrip("/").split("?")[0]
        if "youtube.com" in u.netloc:
            qs = parse_qs(u.query)
            if "v" in qs:
                return qs["v"][0]
            parts = [p for p in u.path.split("/") if p]
            for i, p in enumerate(parts):
                if p in ("embed", "v") and i + 1 < len(parts):
                    return parts[i + 1]
    except Exception:
        pass
    return url.split("?")[0] if "?" in url else url

# ---- transcript fetcher (instance-api compatible) ----
def get_transcript_text(video_id):
    try:
        api = YouTubeTranscriptApi()
        fetched = api.fetch(video_id)  # iterable of snippet-like objects
        texts = []
        for seg in fetched:
            if isinstance(seg, dict):
                t = seg.get("text", "")
            else:
                t = getattr(seg, "text", "")
            if t:
                texts.append(t.strip())
        return " ".join(texts) if texts else None
    except TranscriptsDisabled:
        return None
    except Exception as e:
        # return None for the UI and include a short debug string in logs
        print("transcript fetch exception:", repr(e))
        return None

# ---- Gemini call (v1 + correct payload) ----
def call_gemini(prompt: str, timeout=90):
    endpoint = f"https://generativelanguage.googleapis.com/v1/models/{MODEL}:generateContent?key={GEMINI_API_KEY}"
    payload = {"contents": [{"parts": [{"text": prompt}]}]}
    try:
        r = requests.post(endpoint, json=payload, timeout=timeout)
    except Exception as e:
        return f"Network/error calling Gemini: {e}"
    if not r.ok:
        return f"Gemini API Error ({r.status_code}): {r.text}"
    j = r.json()
    # robust extraction for common response shapes
    try:
        # prefer candidates -> content -> text
        if "candidates" in j and j["candidates"]:
            cand = j["candidates"][0]
            # try common nested places
            for key in ("content", "contents", "output", "text"):
                if key in cand:
                    val = cand[key]
                    # content/contents often lists parts
                    if isinstance(val, list) and len(val) > 0 and isinstance(val[0], dict):
                        t = val[0].get("text")
                        if t:
                            return t
                    # text might be directly present
                    if isinstance(val, str):
                        return val
        # fallback: output -> content -> text
        if "output" in j and isinstance(j["output"], list) and j["output"]:
            oc = j["output"][0].get("content", [])
            if oc and isinstance(oc, list) and len(oc) > 0:
                t = oc[0].get("text")
                if t:
                    return t
        # last fallback: top-level text
        if "text" in j:
            return j["text"]
    except Exception:
        pass
    # fallback: short JSON for debugging
    return str(j)[:4000]

# ---- main summarizer fn used by UI ----
def summarize_youtube(url: str):
    vid = extract_video_id(url)
    if not vid:
        return "❌ Invalid YouTube URL"
    transcript = get_transcript_text(vid)
    if not transcript:
        return "❌ Transcript not available for this video (captions disabled or not found)"
    # If the transcript is extremely long, you may hit token limits/costs — we'll keep it simple now.
    prompt = f"Summarize the following YouTube video transcript into 4 concise bullet points and a one-line TL;DR:\n\n{transcript}"
    return call_gemini(prompt)

# ---- Minimal Gradio UI ----
ui = gr.Interface(
    fn=summarize_youtube,
    inputs=gr.Textbox(label="Paste YouTube link (single input)"),
    outputs=gr.Textbox(label="Video Summary", lines=15),
    title="YouTube → Gemini Summarizer (final)",
    description="Paste a YouTube URL. Put your Gemini API key in the code (GEMINI_API_KEY).",
    examples=[
        ["https://www.youtube.com/watch?v=kYJqgW_qXWk"],
        ["https://www.youtube.com/watch?v=S0Q4J_n-j6k"],
        ["https://www.youtube.com/watch?v=rUxyKA_-grg"]
    ],
    flagging_mode="never"
)

ui.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d81be0d16dd179d30d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


