# Stable Diffusion Demo 

This notebook demonstrates the Stable Diffusion pipeline with visualization of latent representations at each denoising step.

In [1]:
import model_loader
import pipeline
from PIL import Image
from transformers import CLIPTokenizer
import torch

DEVICE = "cpu"

ALLOW_CUDA = True
ALLOW_MPS = False

if torch.cuda.is_available() and ALLOW_CUDA:
    DEVICE = "cuda"
elif (torch.has_mps or torch.backends.mps.is_available()) and ALLOW_MPS:
    DEVICE = "mps"
print(f"Using device: {DEVICE}")

  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda


In [2]:
# Load models and tokenizer
tokenizer = CLIPTokenizer("../data/vocab.json", merges_file="../data/merges.txt")
model_file = "../data/v1-5-pruned-emaonly.ckpt"
models = model_loader.preload_models_from_standard_weights(model_file, DEVICE)
print("Models loaded successfully!")

Models loaded successfully!


In [3]:
# Configuration
prompt = "A dog wearing a red scarf, sitting in a dreamy flower field at golden hour, highly detailed, realistic style"
negative_prompt = "blurry, low quality, distorted, bad anatomy, extra limbs"  # What to avoid
do_cfg = True
cfg_scale = 8  # min: 1, max: 14

# Image to image (optional)
input_image = None
# Uncomment to enable image to image
image_path = "../images/dog.jpg"
# input_image = Image.open(image_path).convert("RGB")
# input_image.resize((512, 512))
strength = 0.8

# Sampler settings - Available options: "ddim", "ddpm", "euler"
sampler = "ddim"  # Default changed to DDIM for faster generation
num_inference_steps = 20  # Reduced for faster execution with DDIM/Euler (DDPM may need 50+)
seed = 42


In [4]:
import gradio as gr
import inpainting
import random
import numpy as np
from threading import Event

cancel_flag = Event()

def generate_txt2img(prompt, negative_prompt, strength, cfg_scale, num_inference_steps, seed, sampler_name):
    if not prompt.strip():
        raise gr.Error("Prompt is required")
    if seed == -1:
        seed = random.randint(0, 999999)
    output_image = pipeline.generate(
        prompt=prompt,
        uncond_prompt=negative_prompt,
        input_image=None,
        strength=strength,
        do_cfg=True,
        cfg_scale=cfg_scale,
        sampler_name=sampler_name,
        n_inference_steps=num_inference_steps,
        seed=seed,
        models=models,
        device=DEVICE,
        idle_device="cpu",
        tokenizer=tokenizer,
        cancel_flag=cancel_flag,
    )
    if output_image is None:
        return None
    return Image.fromarray(output_image)

def generate_img2img(prompt, negative_prompt, input_image, strength, cfg_scale, num_inference_steps, seed, sampler_name):
    if input_image is None:
        raise gr.Error("Please upload an input image")
    if not prompt.strip():
        raise gr.Error("Prompt is required")
    if seed == -1:
        seed = random.randint(0, 999999)
    output_image = pipeline.generate(
        prompt=prompt,
        uncond_prompt=negative_prompt,
        input_image=input_image,
        strength=strength,
        do_cfg=True,
        cfg_scale=cfg_scale,
        sampler_name=sampler_name,
        n_inference_steps=num_inference_steps,
        seed=seed,
        models=models,
        device=DEVICE,
        idle_device="cpu",
        tokenizer=tokenizer,
        cancel_flag=cancel_flag,
    )
    if output_image is None:
        return None
    return Image.fromarray(output_image)

def generate_inpaint(image, mask, prompt, negative_prompt, strength, cfg_scale, num_steps, seed, sampler_name):
    if image is None or mask is None:
        raise gr.Error("Please upload both image and mask")
    if not prompt.strip():
        raise gr.Error("Prompt is required")
    mask = mask.convert("L").resize(image.size)
    mask_np = np.array(mask)
    original_size = image.size
    if seed == -1:
        seed = random.randint(0, 999999)
    result = inpainting.inpaint(
        prompt=prompt,
        image=image,
        mask=mask_np,
        uncond_prompt=negative_prompt,
        strength=strength,
        do_cfg=True,
        cfg_scale=cfg_scale,
        sampler_name=sampler_name,
        n_inference_steps=num_steps,
        models=models,
        tokenizer=tokenizer,
        seed=seed,
        device=DEVICE,
        idle_device="cpu"
    )
    result_image = Image.fromarray(result)
    result_image = result_image.resize(original_size, resample=Image.LANCZOS)
    return result_image

# Available samplers
SAMPLERS = ["ddim", "ddpm", "euler"]

with gr.Blocks(css=".progress-bar, .svelte-1ipelgc {display: none !important;}") as demo:
    gr.Markdown("# Stable Diffusion All-in-One Demo")
    
    with gr.Tabs():
        with gr.Tab("Text-to-Image"):
            t2i_prompt = gr.Textbox(label="📝 Prompt", placeholder="Enter your prompt here...", lines=2)
            t2i_negative = gr.Textbox(label="🚫 Negative Prompt", placeholder="What to avoid in the image...", value="blurry, low quality, distorted", lines=1)
            with gr.Row():
                t2i_sampler = gr.Dropdown(choices=SAMPLERS, value="ddim", label="Sampler")
                t2i_strength = gr.Slider(0.1, 1.0, value=0.8, step=0.1, label="Strength")
            with gr.Row():
                t2i_cfg = gr.Slider(1, 14, value=8, step=1, label="CFG Scale")
                t2i_steps = gr.Slider(1, 50, value=20, step=1, label="Number of Inference Steps")
            t2i_seed = gr.Number(value=-1, label="Seed (-1 = random)")
            t2i_btn = gr.Button("Generate", variant="primary")
            t2i_output = gr.Image(label="Generated Image")
            t2i_btn.click(
                generate_txt2img,
                inputs=[t2i_prompt, t2i_negative, t2i_strength, t2i_cfg, t2i_steps, t2i_seed, t2i_sampler],
                outputs=t2i_output,
            )

        with gr.Tab("Image-to-Image"):
            i2i_prompt = gr.Textbox(label="📝 Prompt", placeholder="Enter your prompt here...", lines=2)
            i2i_negative = gr.Textbox(label="🚫 Negative Prompt", placeholder="What to avoid in the image...", value="blurry, low quality, distorted", lines=1)
            i2i_image = gr.Image(label="Input Image", type="pil")
            with gr.Row():
                i2i_sampler = gr.Dropdown(choices=SAMPLERS, value="ddim", label="Sampler")
                i2i_strength = gr.Slider(0.1, 1.0, value=0.8, step=0.1, label="Strength")
            with gr.Row():
                i2i_cfg = gr.Slider(1, 14, value=8, step=1, label="CFG Scale")
                i2i_steps = gr.Slider(1, 50, value=20, step=1, label="Number of Inference Steps")
            i2i_seed = gr.Number(value=-1, label="Seed (-1 = random)")
            i2i_btn = gr.Button("Generate", variant="primary")
            i2i_output = gr.Image(label="Generated Image")
            i2i_btn.click(
                generate_img2img,
                inputs=[i2i_prompt, i2i_negative, i2i_image, i2i_strength, i2i_cfg, i2i_steps, i2i_seed, i2i_sampler],
                outputs=i2i_output,
            )

        with gr.Tab("Inpainting"):
            inp_image = gr.Image(label="Upload Your Image", type="pil")
            inp_mask = gr.Image(label="Draw Mask (white = inpaint)", type="pil")
            inp_prompt = gr.Textbox(label="📝 Prompt", lines=2, placeholder="e.g. a mountain with a castle")
            inp_negative = gr.Textbox(label="🚫 Negative Prompt", value="blurry, low quality", lines=1)
            with gr.Row():
                inp_sampler = gr.Dropdown(choices=SAMPLERS, value="ddim", label="Sampler")
                inp_strength = gr.Slider(0.1, 1.0, step=0.1, value=0.8, label="Strength")
            with gr.Row():
                inp_cfg = gr.Slider(1.0, 20.0, step=0.5, value=7.5, label="CFG Scale")
                inp_steps = gr.Slider(10, 100, step=5, value=30, label="Denoising Steps")
            inp_seed = gr.Number(value=-1, precision=0, label="Seed (-1 = random)")
            inp_btn = gr.Button("Generate", variant="primary")
            inp_output = gr.Image(label="🖼️ Output Image")
            inp_btn.click(
                generate_inpaint,
                inputs=[inp_image, inp_mask, inp_prompt, inp_negative, inp_strength, inp_cfg, inp_steps, inp_seed, inp_sampler],
                outputs=inp_output,
            )

demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://2d6e7231fda5cd4c59.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Traceback (most recent call last):
  File "/home/shora/Research/stable-diffusion/.venv/lib/python3.12/site-packages/gradio/queueing.py", line 626, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/shora/Research/stable-diffusion/.venv/lib/python3.12/site-packages/gradio/route_utils.py", line 350, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/shora/Research/stable-diffusion/.venv/lib/python3.12/site-packages/gradio/blocks.py", line 2240, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/shora/Research/stable-diffusion/.venv/lib/python3.12/site-packages/gradio/blocks.py", line 1747, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/shora/Research/stable-di

In [None]:
# Test inpainting functionality with negative prompts
print("🔧 Testing inpainting functionality with negative prompts...")

# Create a simple test image and mask
import numpy as np
from PIL import Image

# Create a 512x512 test image (blue background)
test_image = np.full((512, 512, 3), [100, 150, 255], dtype=np.uint8)
test_image_pil = Image.fromarray(test_image)

# Create a simple mask (white circle in center for inpainting)
mask = np.zeros((512, 512), dtype=np.uint8)
center = 256
radius = 100
y, x = np.ogrid[:512, :512]
mask_circle = (x - center)**2 + (y - center)**2 <= radius**2
mask[mask_circle] = 255
mask_pil = Image.fromarray(mask)

test_prompt = "a beautiful red rose in the center"
test_negative = "blurry, low quality, distorted, cartoon"

try:
    print("🎯 Running inpainting test...")
    result = inpainting.inpaint(
        prompt=test_prompt,
        image=test_image_pil,
        mask=mask,
        uncond_prompt=test_negative,
        strength=0.9,
        do_cfg=True,
        cfg_scale=7.5,
        sampler_name="ddim",
        n_inference_steps=15,
        models=models,
        tokenizer=tokenizer,
        seed=42,
        device=DEVICE,
        idle_device="cpu"
    )
    
    # Save the result
    result_image = Image.fromarray(result)
    result_image.save("../data/outputs/inpainting_test_with_negative.png")
    print("✅ Inpainting test successful!")
    print("   📁 Result saved: ../data/outputs/inpainting_test_with_negative.png")
    
except Exception as e:
    print(f"❌ Inpainting test failed: {str(e)}")
    import traceback
    traceback.print_exc()