# Image & Video Generation with Gradio

This notebook provides a complete interface for generating images and videos using AI models.

**Features:**
- Text-to-Image generation using Stable Diffusion
- Text-to-Video generation
- Interactive Gradio interface

**Note:** This notebook is optimized for Google Colab with GPU support.

## 1. Install Dependencies

In [None]:
# Install required packages
!pip install -q diffusers transformers accelerate torch torchvision gradio xformers safetensors
!pip install -q opencv-python imageio imageio-ffmpeg
print("‚úÖ All packages installed successfully!")

## 2. Import Libraries

In [None]:
import torch
import gradio as gr
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from diffusers import DiffusionPipeline
import numpy as np
from PIL import Image
import imageio
import os
from pathlib import Path

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 3. Setup Image Generation Model

In [None]:
# Load Stable Diffusion model for image generation
print("Loading Stable Diffusion model...")

model_id = "runwayml/stable-diffusion-v1-5"
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize the pipeline
image_pipe = StableDiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    safety_checker=None
)

# Use DPM-Solver for faster generation
image_pipe.scheduler = DPMSolverMultistepScheduler.from_config(image_pipe.scheduler.config)
image_pipe = image_pipe.to(device)

# Enable memory optimizations
if torch.cuda.is_available():
    image_pipe.enable_attention_slicing()
    try:
        image_pipe.enable_xformers_memory_efficient_attention()
        print("‚úÖ XFormers enabled for better performance")
    except:
        print("‚ö†Ô∏è XFormers not available, using standard attention")

print("‚úÖ Image generation model loaded successfully!")

## 4. Setup Video Generation Model

In [None]:
# Load text-to-video model
print("Loading video generation model...")

try:
    # Using ModelScope text-to-video model
    !pip install -q modelscope
    from modelscope.pipelines import pipeline
    from modelscope.outputs import OutputKeys
    
    video_pipe = pipeline('text-to-video-synthesis', 'damo/text-to-video-ms-1.7b')
    video_model_loaded = True
    print("‚úÖ Video generation model loaded successfully!")
except Exception as e:
    print(f"‚ö†Ô∏è Could not load video model: {e}")
    print("Video generation will use image-to-video alternative")
    video_model_loaded = False

## 5. Define Generation Functions

In [None]:
def generate_image(prompt, negative_prompt="", num_steps=25, guidance_scale=7.5, seed=-1):
    """
    Generate an image from a text prompt.
    
    Args:
        prompt: Text description of the desired image
        negative_prompt: What to avoid in the image
        num_steps: Number of denoising steps (higher = better quality, slower)
        guidance_scale: How closely to follow the prompt (7-12 recommended)
        seed: Random seed for reproducibility (-1 for random)
    """
    try:
        # Set seed for reproducibility
        generator = None
        if seed != -1:
            generator = torch.Generator(device=device).manual_seed(seed)
        
        # Generate image
        with torch.autocast(device):
            result = image_pipe(
                prompt=prompt,
                negative_prompt=negative_prompt,
                num_inference_steps=num_steps,
                guidance_scale=guidance_scale,
                generator=generator
            )
        
        return result.images[0]
    
    except Exception as e:
        print(f"Error generating image: {e}")
        return None


def generate_video(prompt, num_frames=16, fps=8):
    """
    Generate a video from a text prompt.
    
    Args:
        prompt: Text description of the desired video
        num_frames: Number of frames to generate
        fps: Frames per second for the output video
    """
    try:
        if video_model_loaded:
            # Use ModelScope text-to-video
            output = video_pipe(prompt)
            video_path = output[OutputKeys.OUTPUT_VIDEO]
            return video_path
        else:
            # Fallback: Create video from interpolated images
            print("Using image-based video generation...")
            
            frames = []
            for i in range(num_frames):
                # Generate image with slight variation
                seed = i * 1000
                img = generate_image(
                    prompt=f"{prompt}, frame {i}",
                    num_steps=20,
                    seed=seed
                )
                if img:
                    frames.append(np.array(img))
            
            if frames:
                # Save as video
                output_path = "generated_video.mp4"
                imageio.mimsave(output_path, frames, fps=fps)
                return output_path
            else:
                return None
    
    except Exception as e:
        print(f"Error generating video: {e}")
        return None


def create_interpolation_video(start_prompt, end_prompt, num_frames=30, fps=10):
    """
    Create a video that interpolates between two prompts.
    """
    try:
        frames = []
        
        # Generate start and end images
        start_img = generate_image(start_prompt, num_steps=30)
        end_img = generate_image(end_prompt, num_steps=30)
        
        if start_img and end_img:
            start_array = np.array(start_img)
            end_array = np.array(end_img)
            
            # Linear interpolation
            for i in range(num_frames):
                alpha = i / (num_frames - 1)
                frame = ((1 - alpha) * start_array + alpha * end_array).astype(np.uint8)
                frames.append(frame)
            
            # Save video
            output_path = "interpolation_video.mp4"
            imageio.mimsave(output_path, frames, fps=fps)
            return output_path
        
        return None
    
    except Exception as e:
        print(f"Error creating interpolation: {e}")
        return None

print("‚úÖ Generation functions defined!")

## 6. Create Gradio Interface

In [None]:
# Create Gradio interface with tabs for different functionalities

with gr.Blocks(title="AI Image & Video Generator", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # üé® AI Image & Video Generator
    Generate stunning images and videos using state-of-the-art AI models!
    """)
    
    with gr.Tabs():
        # Tab 1: Image Generation
        with gr.Tab("üñºÔ∏è Image Generation"):
            with gr.Row():
                with gr.Column():
                    img_prompt = gr.Textbox(
                        label="Prompt",
                        placeholder="Describe the image you want to generate...",
                        lines=3
                    )
                    img_negative = gr.Textbox(
                        label="Negative Prompt (Optional)",
                        placeholder="What to avoid in the image...",
                        lines=2
                    )
                    
                    with gr.Row():
                        img_steps = gr.Slider(
                            minimum=10,
                            maximum=50,
                            value=25,
                            step=1,
                            label="Steps"
                        )
                        img_guidance = gr.Slider(
                            minimum=1,
                            maximum=20,
                            value=7.5,
                            step=0.5,
                            label="Guidance Scale"
                        )
                    
                    img_seed = gr.Number(
                        label="Seed (-1 for random)",
                        value=-1
                    )
                    
                    img_button = gr.Button("Generate Image", variant="primary")
                
                with gr.Column():
                    img_output = gr.Image(label="Generated Image", type="pil")
            
            img_button.click(
                fn=generate_image,
                inputs=[img_prompt, img_negative, img_steps, img_guidance, img_seed],
                outputs=img_output
            )
            
            gr.Examples(
                examples=[
                    ["A serene mountain landscape at sunset, photorealistic", "blurry, low quality"],
                    ["A futuristic cyberpunk city with neon lights, 4k", "ugly, distorted"],
                    ["A cute cat wearing a wizard hat, studio lighting", ""]
                ],
                inputs=[img_prompt, img_negative]
            )
        
        # Tab 2: Video Generation
        with gr.Tab("üé¨ Video Generation"):
            with gr.Row():
                with gr.Column():
                    vid_prompt = gr.Textbox(
                        label="Prompt",
                        placeholder="Describe the video you want to generate...",
                        lines=3
                    )
                    
                    with gr.Row():
                        vid_frames = gr.Slider(
                            minimum=8,
                            maximum=32,
                            value=16,
                            step=1,
                            label="Number of Frames"
                        )
                        vid_fps = gr.Slider(
                            minimum=4,
                            maximum=24,
                            value=8,
                            step=1,
                            label="FPS"
                        )
                    
                    vid_button = gr.Button("Generate Video", variant="primary")
                
                with gr.Column():
                    vid_output = gr.Video(label="Generated Video")
            
            vid_button.click(
                fn=generate_video,
                inputs=[vid_prompt, vid_frames, vid_fps],
                outputs=vid_output
            )
            
            gr.Examples(
                examples=[
                    ["A astronaut floating in space"],
                    ["Ocean waves crashing on a beach"]
                ],
                inputs=[vid_prompt]
            )
        
        # Tab 3: Interpolation Video
        with gr.Tab("üîÑ Prompt Interpolation"):
            with gr.Row():
                with gr.Column():
                    interp_start = gr.Textbox(
                        label="Start Prompt",
                        placeholder="Starting scene...",
                        lines=2
                    )
                    interp_end = gr.Textbox(
                        label="End Prompt",
                        placeholder="Ending scene...",
                        lines=2
                    )
                    
                    with gr.Row():
                        interp_frames = gr.Slider(
                            minimum=10,
                            maximum=60,
                            value=30,
                            step=1,
                            label="Number of Frames"
                        )
                        interp_fps = gr.Slider(
                            minimum=5,
                            maximum=30,
                            value=10,
                            step=1,
                            label="FPS"
                        )
                    
                    interp_button = gr.Button("Create Interpolation", variant="primary")
                
                with gr.Column():
                    interp_output = gr.Video(label="Interpolation Video")
            
            interp_button.click(
                fn=create_interpolation_video,
                inputs=[interp_start, interp_end, interp_frames, interp_fps],
                outputs=interp_output
            )
            
            gr.Examples(
                examples=[
                    ["A sunny day in the park", "A starry night sky"],
                    ["A red rose", "A blue orchid"]
                ],
                inputs=[interp_start, interp_end]
            )
    
    gr.Markdown("""
    ---
    ### Tips:
    - **Image Generation**: Use detailed descriptions for better results. Higher steps = better quality but slower.
    - **Video Generation**: Keep prompts simple and descriptive. Video generation takes longer.
    - **Interpolation**: Creates smooth transitions between two different scenes.
    """)

print("‚úÖ Gradio interface created!")

## 7. Launch the Interface

In [None]:
# Launch the Gradio interface
demo.launch(
    share=True,  # Creates a public link
    debug=True,
    show_error=True
)

## 8. Additional Configuration (Optional)

In [None]:
# Optional: Test individual functions

# Test image generation
# test_img = generate_image("a beautiful sunset over mountains")
# display(test_img)

# Test video generation
# test_vid = generate_video("waves on a beach", num_frames=8)
# print(f"Video saved to: {test_vid}")