<a href="https://colab.research.google.com/github/sindhu402/Blue_Bean_Scholarship_Chatbot/blob/main/Major_Project_111.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q gradio pytubefix youtube-transcript-api openai-whisper transformers torch python-dotenv requests

# Install ffmpeg for audio processing
!apt-get install -y ffmpeg -qq

In [None]:
# ============================================
# STABLE DIFFUSION + LoRA TRAINING FOR COLAB
# ============================================
# Train your own LoRA model with custom dataset from Google Drive

# ===== PART 1: SETUP & INSTALLATION =====

# Install required packages
!pip install -q diffusers transformers accelerate safetensors
!pip install -q peft bitsandbytes xformers
!pip install -q datasets opencv-python moviepy

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# ===== PART 2: CONFIGURE YOUR TRAINING =====

# === IMPORTANT: SET YOUR PATHS HERE ===
DATASET_PATH = "/content/drive/MyDrive/Image_Dataset/final"  # Folder with your images
OUTPUT_DIR = "/content/drive/MyDrive/lora_output"  # Where to save trained LoRA
INSTANCE_PROMPT = "A beam of white light refracting and splitting into a spectrum of colors as it passes through a triangular glass prism. Show the distinct color bands"  # Change 'sks' to unique token, describe your subject

# Training settings
MODEL_NAME = "runwayml/stable-diffusion-v1-5"
RESOLUTION = 512
TRAIN_BATCH_SIZE = 1
MAX_TRAIN_STEPS = 1000  # More steps = better quality but longer training
LEARNING_RATE = 1e-4
LORA_RANK = 4  # Higher = more capacity but more VRAM (4-128)

# ===== PART 3: PREPARE DATASET =====

import os
from PIL import Image
import shutil

# Create working directory
WORK_DIR = "/content/training_data"
os.makedirs(WORK_DIR, exist_ok=True)

# Copy and resize images from your Drive
def prepare_dataset(source_path, dest_path, size=512):
    os.makedirs(dest_path, exist_ok=True)
    valid_ext = ('.jpg', '.jpeg', '.png', '.webp')
    count = 0

    for f in os.listdir(source_path):
        if f.lower().endswith(valid_ext):
            img_path = os.path.join(source_path, f)
            try:
                img = Image.open(img_path).convert('RGB')
                img = img.resize((size, size), Image.LANCZOS)
                img.save(os.path.join(dest_path, f"{count:04d}.png"))
                count += 1
                print(f"Processed: {f}")
            except Exception as e:
                print(f"Skipped {f}: {e}")

    print(f"\n‚úÖ Prepared {count} images for training")
    return count

num_images = prepare_dataset(DATASET_PATH, WORK_DIR, RESOLUTION)

# ===== PART 4: LORA TRAINING =====

import torch
from diffusers import StableDiffusionPipeline, DDPMScheduler
from diffusers.loaders import LoraLoaderMixin
from transformers import CLIPTextModel, CLIPTokenizer
from peft import LoraConfig, get_peft_model
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn.functional as F

# Custom Dataset
class DreamBoothDataset(Dataset):
    def __init__(self, data_dir, prompt, tokenizer, size=512):
        self.data_dir = data_dir
        self.prompt = prompt
        self.tokenizer = tokenizer
        self.size = size
        self.images = [f for f in os.listdir(data_dir) if f.endswith('.png')]

        self.transform = transforms.Compose([
            transforms.Resize((size, size)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5])
        ])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = Image.open(os.path.join(self.data_dir, self.images[idx])).convert('RGB')
        img = self.transform(img)
        tokens = self.tokenizer(
            self.prompt,
            padding="max_length",
            max_length=77,
            truncation=True,
            return_tensors="pt"
        )
        return {"pixel_values": img, "input_ids": tokens.input_ids.squeeze()}

# Load model components
print("Loading model...")
pipe = StableDiffusionPipeline.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    safety_checker=None
)
pipe = pipe.to("cuda")

tokenizer = pipe.tokenizer
text_encoder = pipe.text_encoder
vae = pipe.vae
unet = pipe.unet
noise_scheduler = DDPMScheduler.from_pretrained(MODEL_NAME, subfolder="scheduler")

# Freeze VAE and text encoder
vae.requires_grad_(False)
text_encoder.requires_grad_(False)

# Apply LoRA to UNet
lora_config = LoraConfig(
    r=LORA_RANK,
    lora_alpha=LORA_RANK,
    init_lora_weights="gaussian",
    target_modules=["to_k", "to_q", "to_v", "to_out.0"]
)

unet = get_peft_model(unet, lora_config)
unet.print_trainable_parameters()

# Prepare dataset and dataloader
dataset = DreamBoothDataset(WORK_DIR, INSTANCE_PROMPT, tokenizer, RESOLUTION)
dataloader = DataLoader(dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True)

# Optimizer
optimizer = torch.optim.AdamW(unet.parameters(), lr=LEARNING_RATE)

# Training loop
print(f"\nüöÄ Starting LoRA training for {MAX_TRAIN_STEPS} steps...")
unet.train()
global_step = 0
losses = []

while global_step < MAX_TRAIN_STEPS:
    for batch in dataloader:
        if global_step >= MAX_TRAIN_STEPS:
            break

        pixel_values = batch["pixel_values"].to("cuda", dtype=torch.float16)
        input_ids = batch["input_ids"].to("cuda")

        # Encode images to latent space
        latents = vae.encode(pixel_values).latent_dist.sample() * 0.18215

        # Sample noise
        noise = torch.randn_like(latents)
        timesteps = torch.randint(0, 1000, (latents.shape[0],), device="cuda").long()
        noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)

        # Get text embeddings
        encoder_hidden_states = text_encoder(input_ids)[0]

        # Predict noise
        noise_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample

        # Calculate loss
        loss = F.mse_loss(noise_pred.float(), noise.float())

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.append(loss.item())
        global_step += 1

        if global_step % 50 == 0:
            avg_loss = sum(losses[-50:]) / len(losses[-50:])
            print(f"Step {global_step}/{MAX_TRAIN_STEPS} | Loss: {avg_loss:.4f}")

print("‚úÖ Training complete!")

# ===== PART 5: SAVE LORA WEIGHTS =====

os.makedirs(OUTPUT_DIR, exist_ok=True)
unet.save_pretrained(OUTPUT_DIR)
print(f"‚úÖ LoRA weights saved to: {OUTPUT_DIR}")

# ===== PART 6: GENERATE IMAGES WITH YOUR LORA =====

from diffusers import StableDiffusionPipeline
import matplotlib.pyplot as plt

# Load base model and apply your LoRA
pipe = StableDiffusionPipeline.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    safety_checker=None
).to("cuda")

# Load LoRA weights
pipe.load_lora_weights(OUTPUT_DIR)

def generate_with_lora(prompt, num_images=1, steps=50, guidance=7.5):
    """Generate images using your trained LoRA"""
    images = pipe(
        prompt,
        num_inference_steps=steps,
        guidance_scale=guidance,
        num_images_per_prompt=num_images
    ).images

    fig, axes = plt.subplots(1, len(images), figsize=(5*len(images), 5))
    if len(images) == 1:
        axes = [axes]
    for ax, img in zip(axes, images):
        ax.imshow(img)
        ax.axis('off')
    plt.suptitle(prompt[:60] + "..." if len(prompt) > 60 else prompt)
    plt.show()

    return images

# Test your LoRA!
# Use the same token you used in INSTANCE_PROMPT
images = generate_with_lora(f"{INSTANCE_PROMPT} in a beautiful garden", num_images=2)

# ===== PART 7: ANIMATE TO VIDEO =====

import cv2
import numpy as np
from moviepy.editor import ImageSequenceClip
from IPython.display import HTML
from base64 import b64encode

def animate_image_to_video(image, output_path="animated_output.mp4", duration=5, effect="zoom"):
    """Animate image: zoom, zoom_out, pan_left, pan_right, pulse, rotate"""
    img_array = np.array(image)
    fps = 30
    total_frames = duration * fps
    frames = []
    h, w = img_array.shape[:2]

    if effect == "zoom":
        for i in range(total_frames):
            scale = 1 + (i / total_frames) * 0.3
            new_h, new_w = int(h * scale), int(w * scale)
            resized = cv2.resize(img_array, (new_w, new_h))
            start_y, start_x = (new_h - h) // 2, (new_w - w) // 2
            frames.append(resized[start_y:start_y+h, start_x:start_x+w])
    elif effect == "zoom_out":
        for i in range(total_frames):
            scale = 1.3 - (i / total_frames) * 0.3
            new_h, new_w = int(h * scale), int(w * scale)
            resized = cv2.resize(img_array, (new_w, new_h))
            start_y, start_x = (new_h - h) // 2, (new_w - w) // 2
            frames.append(resized[start_y:start_y+h, start_x:start_x+w])
    elif effect == "pan_left":
        padded = cv2.copyMakeBorder(img_array, 0, 0, w//4, w//4, cv2.BORDER_REFLECT)
        for i in range(total_frames):
            offset = int((i / total_frames) * (w // 2))
            frames.append(padded[:, offset:offset+w])
    elif effect == "pan_right":
        padded = cv2.copyMakeBorder(img_array, 0, 0, w//4, w//4, cv2.BORDER_REFLECT)
        for i in range(total_frames):
            offset = int(((total_frames - i) / total_frames) * (w // 2))
            frames.append(padded[:, offset:offset+w])
    elif effect == "pulse":
        for i in range(total_frames):
            scale = 1 + 0.05 * np.sin(2 * np.pi * i / (fps * 1.5))
            new_h, new_w = int(h * scale), int(w * scale)
            resized = cv2.resize(img_array, (new_w, new_h))
            start_y, start_x = (new_h - h) // 2, (new_w - w) // 2
            cropped = resized[max(0,start_y):start_y+h, max(0,start_x):start_x+w]
            frames.append(cv2.resize(cropped, (w, h)) if cropped.shape[:2] != (h,w) else cropped)
    elif effect == "rotate":
        for i in range(total_frames):
            angle = (i / total_frames) * 10 - 5
            matrix = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.1)
            frames.append(cv2.warpAffine(img_array, matrix, (w, h), borderMode=cv2.BORDER_REFLECT))

    clip = ImageSequenceClip(frames, fps=fps)
    clip.write_videofile(output_path, codec='libx264', audio=False, verbose=False, logger=None)
    print(f"‚úÖ Video saved: {output_path}")
    return output_path

def show_video(path):
    mp4 = open(path, 'rb').read()
    return HTML(f'<video width=512 controls><source src="data:video/mp4;base64,{b64encode(mp4).decode()}" type="video/mp4"></video>')

# Animate your generated image
if images:
    video_path = animate_image_to_video(images[0], effect="zoom", duration=5)
    show_video(video_path)

# ===== QUICK REFERENCE =====
#
# 1. Put 10-20 images of your subject in a Google Drive folder
# 2. Update DATASET_PATH to point to that folder
# 3. Change INSTANCE_PROMPT (use unique token like "sks" + description)
# 4. Run all cells and wait for training (~15-30 mins)
# 5. Generate with: generate_with_lora("a photo of sks person as an astronaut")
# 6. Animate with: animate_image_to_video(images[0], effect="zoom")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Processed: img_0005.jpg
Processed: img_0012.jpg
Processed: img_0008.jpg
Processed: img_0004.jpg
Processed: img_0014.jpg
Processed: img_0006.jpg
Processed: img_0001.jpg
Processed: img_0011.jpg
Processed: img_0013.jpg
Processed: img_0009.jpg
Processed: img_0000.jpg
Processed: img_0015.jpg
Processed: img_0010.jpg
Processed: img_0002.jpg
Processed: img_0007.jpg
Processed: img_0003.jpg

‚úÖ Prepared 16 images for training
Loading model...


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

In [None]:
import gradio as gr
# Note: pytubefix import removed to avoid bot detection issues
# We extract video IDs using regex instead
import re
import os
import subprocess
import tempfile
import whisper
from youtube_transcript_api import YouTubeTranscriptApi
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import requests
from google.colab import userdata

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device_info = f"‚úÖ Using GPU: {torch.cuda.get_device_name(0)}" if device.type == "cuda" else "‚ö†Ô∏è GPU not found! Running on CPU (slow)."
print(device_info)

# LED model for long documents
print("Loading LED model...")
MODEL_NAME = "allenai/led-base-16384"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).half().to(device).eval()
MAX_INPUT_LEN = 16384
print("LED model loaded!")

TRANSCRIPT_FILE = "/content/transcripts.txt"

# OpenRouter API configuration
try:
    OPENROUTER_API_KEY = userdata.get('OPENROUTER_API_KEY')
except:
    OPENROUTER_API_KEY = None
    print("‚ö†Ô∏è OpenRouter API key not found. Topic extraction will be disabled.")
    print("To enable: Click the üîë icon in left sidebar ‚Üí Add secret named 'OPENROUTER_API_KEY'")

OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"

# Whisper model cache
whisper_model = None

def get_whisper_model(model_name="base"):
    global whisper_model
    if whisper_model is None:
        print(f"Loading Whisper {model_name} model...")
        whisper_model = whisper.load_model(model_name)
        print("Whisper model loaded!")
    return whisper_model

# --- YouTube Utilities ---
def get_youtube_id(url: str) -> str:
    """Extract video ID from YouTube URL without using pytubefix (avoids bot detection)."""
    import re
    patterns = [
        r'(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([a-zA-Z0-9_-]{11})',
        r'(?:youtube\.com\/shorts\/)([a-zA-Z0-9_-]{11})',
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    raise ValueError(f"Could not extract video ID from URL: {url}")

def yt_transcribe(video_id):
    try:
        subt = YouTubeTranscriptApi().fetch(video_id)
        text_parts = [item.text for item in subt]
        return " ".join(text_parts)
    except Exception as e:
        print(f"Transcript not available: {e}")
        return None

# --- Video / Audio Utilities ---
def repair_video(input_path):
    fixed_path = input_path.replace(".mp4", "_fixed.mp4")
    subprocess.run(
        ["ffmpeg", "-y", "-i", input_path, "-c", "copy", "-movflags", "faststart", fixed_path],
        check=True,
        capture_output=True
    )
    return fixed_path

def extract_audio_ffmpeg(video_path):
    audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
    subprocess.run([
        "ffmpeg", "-y", "-i", video_path,
        "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1",
        audio_path
    ], check=True, capture_output=True)
    return audio_path

def transcribe_video(video_path, model_name="base"):
    model = get_whisper_model(model_name)
    audio_path = extract_audio_ffmpeg(video_path)
    try:
        result = model.transcribe(audio_path)
        return result["text"]
    finally:
        if os.path.exists(audio_path):
            os.remove(audio_path)

# --- Simplified Video Generation (prompt only) ---
def generate_video_from_prompt(prompt):
    """Generate image and animate it to video with fixed default parameters."""
    if not prompt or not prompt.strip():
        return None, "‚ùå Please enter a prompt."

    try:
        # Fixed default parameters
        num_images = 1
        steps = 50
        guidance = 7.5
        effect = "zoom"
        duration = 5

        # Generate image with fixed parameters
        images = generate_with_lora(prompt, num_images=num_images, steps=steps, guidance=guidance)
        image = images[0]

        # Animate to video with fixed parameters
        output_path = "/content/animated_video.mp4"
        video_path = animate_image_to_video(image, output_path=output_path, effect=effect, duration=duration)

        return video_path, "‚úÖ Video generated successfully!"
    except Exception as e:
        return None, f"‚ùå Error generating video: {e}"

# --- File Utilities ---
def save_transcript_to_file(text, filename=TRANSCRIPT_FILE):
    with open(filename, "a", encoding="utf-8") as f:
        f.write(text + "\n\n" + "=" * 80 + "\n\n")

# --- Summarization Utilities ---
def chunk_text(text, max_tokens=MAX_INPUT_LEN):
    words = text.split()
    chunks = []
    cur_chunk = []
    cur_len = 0
    for word in words:
        cur_len += len(tokenizer.encode(word, add_special_tokens=False))
        cur_chunk.append(word)
        if cur_len >= max_tokens:
            chunks.append(" ".join(cur_chunk))
            cur_chunk = []
            cur_len = 0
    if cur_chunk:
        chunks.append(" ".join(cur_chunk))
    return chunks

def summarize_chunk(text, min_len=250, max_len=450):
    enc = tokenizer(text, padding=True, truncation=True, max_length=MAX_INPUT_LEN, return_tensors="pt")
    enc = {k: v.to(device) for k, v in enc.items()}
    with torch.no_grad():
        gen_ids = model.generate(
            **enc,
            max_length=max_len,
            min_length=min_len,
            num_beams=4,
            no_repeat_ngram_size=3,
            length_penalty=2.0,
            early_stopping=True
        )
    return tokenizer.batch_decode(gen_ids, skip_special_tokens=True)[0]

def summarize_large_text(text, min_len=250, max_len=450):
    chunks = chunk_text(text)
    chunk_summaries = [summarize_chunk(c, min_len, max_len) for c in chunks]
    combined_summary = " ".join(chunk_summaries)
    final_summary = summarize_chunk(combined_summary, min_len, max_len)
    return final_summary

# --- Topic Extraction with OpenRouter Mistral ---
def extract_main_topics(transcript_text):
    if not OPENROUTER_API_KEY:
        return "‚ö†Ô∏è OpenRouter API key not found. Please add it to Colab Secrets (üîë icon in sidebar)."

    try:
        max_chars = 15000
        truncated_text = transcript_text[:max_chars] if len(transcript_text) > max_chars else transcript_text

        prompt = f"""Analyze the following educational content transcript and identify exactly 3 main topics discussed.
For each topic, provide:
1. A clear, concise topic title (5-10 words)
2. A brief description (1-2 sentences)

Format your response as:
**Topic 1: [Title]**
[Description]

**Topic 2: [Title]**
[Description]

**Topic 3: [Title]**
[Description]

Transcript:
{truncated_text}
"""

        headers = {
            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
            "Content-Type": "application/json"
        }

        data = {
            "model": "mistralai/mistral-7b-instruct:free",
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": 800,
            "temperature": 0.7
        }

        response = requests.post(OPENROUTER_URL, headers=headers, json=data)
        response.raise_for_status()

        result = response.json()
        topics = result['choices'][0]['message']['content']
        return topics

    except Exception as e:
        return f"‚ùå Error extracting topics: {e}"

# --- Main Processing Function ---
def process_videos(youtube_links, uploaded_files, progress=gr.Progress()):
    transcripts = []
    errors = []

    open(TRANSCRIPT_FILE, "w").close()

    if youtube_links and youtube_links.strip():
        links = [link.strip() for link in youtube_links.strip().splitlines() if link.strip()]
        for i, link in enumerate(links):
            progress((i + 1) / (len(links) + 1), desc=f"Processing YouTube link {i + 1}/{len(links)}")
            try:
                video_id = get_youtube_id(link)
                transcript = yt_transcribe(video_id)
                if transcript:
                    save_transcript_to_file(f"Transcript for {link}:\n{transcript}")
                    transcripts.append(transcript)
                else:
                    errors.append(f"‚ö†Ô∏è Could not get transcript for {link}")
            except Exception as e:
                errors.append(f"‚ùå Error processing {link}: {e}")

    if uploaded_files:
        for i, uploaded_file in enumerate(uploaded_files):
            progress((i + 1) / (len(uploaded_files) + 1), desc=f"Transcribing video {i + 1}/{len(uploaded_files)}")
            try:
                if hasattr(uploaded_file, 'name'):
                    file_path = uploaded_file.name
                else:
                    file_path = uploaded_file

                fixed_video = repair_video(file_path)
                transcript = transcribe_video(fixed_video, model_name="base")
                if transcript:
                    filename = os.path.basename(file_path)
                    save_transcript_to_file(f"Transcript for {filename}:\n{transcript}")
                    transcripts.append(transcript)
            except Exception as e:
                errors.append(f"‚ùå Error processing uploaded file: {e}")

    if not transcripts:
        error_msg = "\n".join(errors) if errors else "No transcripts were generated."
        return "", "", "", error_msg

    combined_text = "\n\n".join(transcripts)

    progress(0.7, desc="Extracting main topics...")
    topics = extract_main_topics(combined_text)

    progress(0.9, desc="Generating summary...")
    summarized_text = summarize_large_text(combined_text, min_len=250, max_len=450)

    error_msg = "\n".join(errors) if errors else "‚úÖ Processing completed successfully!"

    return combined_text, topics, summarized_text, error_msg

# --- Gradio Interface ---
with gr.Blocks(
    title="Educational Content Summarizer",
    theme=gr.themes.Soft(),
    css=".gradio-container {max-width: 1200px !important}"
) as demo:
    gr.Markdown("# üìö Educational Content Creation through Multi-Video Summarization")
    gr.Markdown(f"**Device Status:** {device_info}")

    with gr.Row():
        with gr.Column(scale=1):
            youtube_input = gr.Textbox(
                label="üîó YouTube Video Links",
                placeholder="Enter YouTube links (one per line)...\n\nExample:\nhttps://www.youtube.com/watch?v=VIDEO_ID",
                lines=5
            )
            video_upload = gr.File(
                label="üìÅ Upload Video Files",
                file_types=[".mp4", ".avi", ".mov", ".mkv"],
                file_count="multiple"
            )
            process_btn = gr.Button("üé¨ Get Detailed Notes", variant="primary", size="lg")

        with gr.Column(scale=2):
            with gr.Tabs():
                with gr.TabItem("üìù Transcript"):
                    transcript_output = gr.Textbox(
                        label="Combined Transcript",
                        lines=15,
                        show_copy_button=True
                    )
                with gr.TabItem("üéØ Main Topics"):
                    topics_output = gr.Markdown(label="Main Topics Identified")
                with gr.TabItem("üìã Summary"):
                    summary_output = gr.Textbox(
                        label="Final Summary (~20 sentences)",
                        lines=10,
                        show_copy_button=True
                    )
                with gr.TabItem("üé® Video Generator"):
                    gr.Markdown("## üé• Generate Animated Video from Prompt")
                    gr.Markdown("*Enter a prompt and get an animated video automatically.*")

                    prompt_input = gr.Textbox(
                        label="üñäÔ∏è Prompt",
                        placeholder="Describe the scene to generate...",
                        lines=3
                    )
                    generate_video_btn = gr.Button("üé¨ Generate Video", variant="primary", size="lg")

                    video_output = gr.Video(label="Generated Video")
                    video_status = gr.Textbox(label="Status", interactive=False)

                    generate_video_btn.click(
                        fn=generate_video_from_prompt,
                        inputs=[prompt_input],
                        outputs=[video_output, video_status]
                    )

            status_output = gr.Textbox(
                label="Status",
                lines=3,
                interactive=False
            )

    process_btn.click(
        fn=process_videos,
        inputs=[youtube_input, video_upload],
        outputs=[transcript_output, topics_output, summary_output, status_output],
        show_progress="full"
    )

    gr.Markdown("""
    ---
    ### üìñ Instructions:
    1. **YouTube Videos**: Paste YouTube video links (one per line)
    2. **Local Videos**: Upload video files (.mp4, .avi, .mov, .mkv)
    3. Click **"Get Detailed Notes"** to process
    4. View results in the tabs: Transcript, Topics, and Summary
    5. **Video Generator**: Enter a prompt and click "Generate Video" to create an animated video

    """)

In [None]:
demo.launch(
    share=True,  # Creates a public URL you can share
    debug=True,  # Shows detailed errors
    show_error=True
)