In [None]:
!pip install diffusers transformers accelerate

In [None]:
import warnings
from diffusers import DiffusionPipeline
from PIL import Image
from IPython.display import display
import numpy as np

In [None]:
# Suppress all UserWarnings
warnings.simplefilter("ignore", category=UserWarning)

# Initialize DiffusionPipeline objects directly on GPU
base = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0", use_safetensors=True, device='cuda'
)
refiner = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-refiner-1.0",
    text_encoder_2=base.text_encoder_2,
    vae=base.vae,
    use_safetensors=True,
    device='cuda'
)

def generate(prompt_text):
    n_steps = 40
    high_noise_frac = 0.8

    # Generate initial image
    image_tensor = base(
        prompt=prompt_text,
        num_inference_steps=n_steps,
        denoising_end=high_noise_frac,
        output_type="latent",
    ).images

    # Refine the image
    refined_image = refiner(
        prompt=prompt_text,
        num_inference_steps=n_steps,
        denoising_start=high_noise_frac,
        image=image_tensor,
    ).images[0]  # Get the first image from the list

    # Convert PIL Image to numpy array (if needed, but you can display directly)
    image_np = np.array(refined_image)

    # Display the image
    display(refined_image)  # Use refined_image here



In [None]:
# User interaction loop
while True:
    prompt_text = input("Enter your prompt (or 'q' to quit): ")
    if prompt_text.lower() == 'q':
        break
    generate(prompt_text)