# 🎨 Stable Diffusion XL (SDXL) Image Generator

**Colab-Ready, Version-Locked (May 2025, tested).**

This notebook uses SDXL 1.0, LoRA/Refiner support, with Gradio 4.44.x UI.

**Features:**
- High-res 1024x1024 SDXL images
- LoRA support (with HF model path)
- SDXL Refiner support
- Negative prompt
- Quantized loading for Colab GPUs
- Full dependency sanity-check (no conflicting installs)


In [None]:
# ----------------------------------------------------------
# 1. Clean and Install Compatible Packages (Colab Only!)
# ----------------------------------------------------------
!pip cache purge
!pip uninstall -y diffusers transformers huggingface_hub gradio fastapi pydantic websockets sentence-transformers peft xformers
!pip install --upgrade pip

# Install only compatible, tested versions
!pip install -q \
  torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 \
  diffusers==0.27.2 \
  transformers==4.39.3 \
  huggingface_hub==0.22.2 \
  accelerate==0.28.0 \
  safetensors==0.4.2 \
  xformers==0.0.28.post1 \
  gradio==4.44.1 \
  fastapi==0.110.2 \
  pydantic==2.7.1 \
  bitsandbytes

## Import Dependencies

In [None]:
import torch
from diffusers import StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline
from transformers import BitsAndBytesConfig
import gradio as gr
from PIL import Image

## Load Models (Base & Refiner, Quantized)

In [None]:
# For memory efficiency, always use torch.float16 on Colab
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_compute_dtype=torch.float16
)

def load_base_model():
    global pipe
    pipe = StableDiffusionXLPipeline.from_pretrained(
        "stabilityai/stable-diffusion-xl-base-1.0",
        torch_dtype=torch.float16,
        variant="fp16",
        device_map="balanced",
        quantization_config=bnb_config
    )
    pipe.enable_xformers_memory_efficient_attention()

def load_refiner_model():
    global refiner
    refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
        "stabilityai/stable-diffusion-xl-refiner-1.0",
        torch_dtype=torch.float16,
        variant="fp16",
        device_map="balanced",
        quantization_config=bnb_config
    )
    refiner.enable_xformers_memory_efficient_attention()

pipe = None
refiner = None
load_base_model()

## Image Generation Function

In [None]:
def generate_image(prompt, negative_prompt="", num_steps=30, guidance_scale=7.5,
                  use_refiner=False, refiner_steps=10, lora_model="", lora_scale=0.5,
                  advanced_mode=False):
    global pipe, refiner
    try:
        cross_attention_kwargs = {}
        if advanced_mode and lora_model.strip():
            try:
                pipe.unet.load_attn_procs(lora_model)
                cross_attention_kwargs = {"scale": lora_scale}
            except Exception as e:
                print(f"LoRA loading error: {e}")
                return Image.new("RGB", (512, 512), color="black")
        # Generate base image
        image = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            num_inference_steps=num_steps,
            guidance_scale=guidance_scale,
            cross_attention_kwargs=cross_attention_kwargs
        ).images[0]
        # Optionally refine
        if advanced_mode and use_refiner:
            if refiner is None:
                load_refiner_model()
            image = refiner(
                prompt=prompt,
                negative_prompt=negative_prompt,
                num_inference_steps=refiner_steps,
                image=image
            ).images[0]
        return image
    except Exception as e:
        print(f"Error generating image: {e}")
        return Image.new("RGB", (512, 512), color="black")

## Gradio UI (Colab-Ready, Auto-Port, Debug Mode)

In [None]:
with gr.Blocks() as demo:
    gr.Markdown("# 🎨 Stable Diffusion XL 1.0 (May 2025)\nGenerate high-quality images from text.")
    with gr.Row():
        with gr.Column(scale=2):
            prompt = gr.Textbox(label="Prompt", placeholder="A haunted forest at dusk", lines=2)
            negative_prompt = gr.Textbox(label="Negative Prompt", placeholder="blurry, low quality", lines=1)
            num_steps = gr.Slider(1, 100, value=30, label="Inference Steps")
            guidance_scale = gr.Slider(1.0, 20.0, value=7.5, label="Guidance Scale")
            advanced = gr.Checkbox(label="Advanced Mode (LoRA/Refiner)", value=False)
            use_refiner = gr.Checkbox(label="Use Refiner", value=False)
            refiner_steps = gr.Slider(1, 50, value=10, label="Refiner Steps")
            lora_model = gr.Textbox(label="LoRA Model ID (HF repo)", placeholder="ostris/super-realistic-xl")
            lora_scale = gr.Slider(0.0, 2.0, value=0.5, label="LoRA Scale")
            advanced.change(
                fn=lambda t: gr.update(visible=t),
                inputs=advanced,
                outputs=[use_refiner, refiner_steps, lora_model, lora_scale],
            )
            btn = gr.Button("Generate Image")
        with gr.Column(scale=2):
            output = gr.Image(label="Generated Image", type="pil")
    btn.click(
        fn=generate_image,
        inputs=[prompt, negative_prompt, num_steps, guidance_scale, use_refiner, refiner_steps, lora_model, lora_scale, advanced],
        outputs=output,
    )
demo.launch(share=True, debug=True)
