Text-to-Image Generation with Stable Diffusion
==============================================

[View on Google Colab](https://colab.research.google.com/drive/1DA2GPoR8rGgYOCgdTUDpX1eMxWKcWFVV?usp=sharing)

### Import the necessary libraries

In [1]:
#!pip install torch matplotlib pillow diffusers

import torch
import matplotlib.pyplot as plt
from PIL import Image
from diffusers import StableDiffusionPipeline

---

### Device Setup

In [2]:
def setup_device():
    """Setup optimal device for diffusion models."""
    if torch.cuda.is_available():
        return "cuda", torch.float16
    elif torch.backends.mps.is_available():
        return "mps", torch.float32
    else:
        return "cpu", torch.float32

---

### Text2Image Pipeline from HuggingFace

In [3]:
def load_text2img_pipeline(device="cuda", dtype=torch.float16):
    """Load Stable Diffusion pipeline for text-to-image generation."""
    pipe = StableDiffusionPipeline.from_pretrained(
        "runwayml/stable-diffusion-v1-5",
        torch_dtype=dtype,
        safety_checker=None,
        requires_safety_checker=False
    )
    pipe = pipe.to(device)
    if hasattr(pipe, 'enable_attention_slicing'):
        pipe.enable_attention_slicing()
    return pipe


---

### Run Inference

In [None]:
def generate_image(pipe, prompt, seed=42):
    """Generate image from text prompt."""
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
    
    with torch.no_grad():
        result = pipe(
            prompt=prompt,
            negative_prompt="blurry, low quality",
            num_inference_steps=20,
            guidance_scale=7.5
        )
    return result.images[0]

In [None]:
def visualize_results(results):
    """Visualize generated images."""
    fig, axes = plt.subplots(1, len(results), figsize=(15, 5))
    if len(results) == 1:
        axes = [axes]

    for i, (prompt, image) in enumerate(results):
        axes[i].imshow(image)
        axes[i].set_title(prompt[:40] + "...", fontsize=10)
        axes[i].axis('off')

    plt.tight_layout()
    plt.show()

In [None]:
# Execute
device, dtype = setup_device()
pipe = load_text2img_pipeline(device, dtype)

prompts = [
    "A majestic lion in a savanna at sunset",
    "A futuristic city with flying cars",
    "A serene mountain lake with reflection"
]

results = []
for i, prompt in enumerate(prompts):
    image = generate_image(pipe, prompt, seed=42+i)
    results.append((prompt, image))

visualize_results(results)

---