[Stable Diffusion XL Base 1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lwieske/stable-diffusion-xl-demos/blob/main/stable_diffusion_xl_demos_colab.ipynb)

In [1]:
import os
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

!rm -rf "/content/drive/MyDrive/README.md"
!rm -rf "/content/drive/MyDrive/images"

!mkdir -p "/content/drive/MyDrive/images"

Mounted at /content/drive


In [2]:
!pip install --quiet --upgrade accelerate diffusers invisible_watermark mediapy transformers

In [3]:
!nvidia-smi

Sat Sep 30 22:37:16 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   72C    P8    14W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
import random
import sys

import torch

import mediapy as media

from diffusers import DiffusionPipeline

from tqdm.notebook import tqdm

In [5]:
BASE_MODEL    = 'stabilityai/stable-diffusion-xl-base-1.0'

REFINER_MODEL = 'stabilityai/stable-diffusion-xl-refiner-1.0'

NUM_INFERENCE_STEPS  = 100
NUM_REFINEMENT_STEPS = 150

In [6]:
base_pipeline = DiffusionPipeline.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    variant="fp16",
    use_safetensors=True,
)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [7]:
base_pipeline = base_pipeline.to('cuda')

In [8]:
prompts = [
    'franoise nielly',
    'crowded beach scene',
    'market scene from a vendors perspective emphasizing colorful stalls and busy shoppers and an array of products',
    'draw data center in charcoal style',
    'paint data center in the style of van gogh',
    'Donald Trump in prison behind bars',
    'modern building with organic design in Hadid style',
    'Pope Francis mixing live as DJ in a nightclub',
    'fantasy medieval village world inside a glass sphere',
    'romantic painting of a ship sailing in a stormy sea with dramatic lighting and powerful waves',
    'black and white street photography of a rainy night in New York and reflections on wet pavement',
    'documentary style photography of a bustling marketplace with spices and textiles',
    'isometric digital art of a medieval village with thatched roofs and market square and townsfolk',
    'minimalist digital artwork with an abstract geometric pattern and a harmonious color palette',
    'abstract painting representing the sound of jazz music using vibrant colors and erratic shapes',
    'abstract painting representing the sound of rock music using vibrant colors and erratic shapes',
    'digital art with straight houses in foreground and background digital art with sharp focus in anime style',
    'gingerbread house diorama in focus with crunch cereal',
    'cartoon cow portrait digital art in high definition and Andy Warhol style',
    'dystopian image with earth in chain',
    'a majestic lion jumping from a big stone at night',
    'dagobert duck jumping into an ocean of screws',
]

In [9]:
base_images        = []
image_descriptions = []

for prompt in prompts:

    seed = random.randint(0, sys.maxsize)

    base_image = base_pipeline(
        prompt = prompt,
        output_type = 'latent',
        generator = torch.Generator("cuda").manual_seed(seed),
        num_inference_steps=NUM_INFERENCE_STEPS,
    ).images

    base_images += base_image

    file = prompt.replace(' ', '_')

    print(f"Prompt:\t{prompt}\nSeed:\t{seed}\nFile:\timages/{file}.png\n")

    image_descriptions.append({'Prompt': prompt, 'Seed': seed, 'File': file})

  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	franoise nielly
Seed:	622397290048936980
File:	images/franoise_nielly.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	crowded beach scene
Seed:	4969744290378521006
File:	images/crowded_beach_scene.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	market scene from a vendors perspective emphasizing colorful stalls and busy shoppers and an array of products
Seed:	4927185335424292438
File:	images/market_scene_from_a_vendors_perspective_emphasizing_colorful_stalls_and_busy_shoppers_and_an_array_of_products.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	draw data center in charcoal style
Seed:	7063583911781351740
File:	images/draw_data_center_in_charcoal_style.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	paint data center in the style of van gogh
Seed:	9095091719979286310
File:	images/paint_data_center_in_the_style_of_van_gogh.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	Donald Trump in prison behind bars
Seed:	5386717842849785265
File:	images/Donald_Trump_in_prison_behind_bars.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	modern building with organic design in Hadid style
Seed:	6001590354922436634
File:	images/modern_building_with_organic_design_in_Hadid_style.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	Pope Francis mixing live as DJ in a nightclub
Seed:	3382871710735566483
File:	images/Pope_Francis_mixing_live_as_DJ_in_a_nightclub.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	fantasy medieval village world inside a glass sphere
Seed:	4276734089794774005
File:	images/fantasy_medieval_village_world_inside_a_glass_sphere.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	romantic painting of a ship sailing in a stormy sea with dramatic lighting and powerful waves
Seed:	3345020636670136222
File:	images/romantic_painting_of_a_ship_sailing_in_a_stormy_sea_with_dramatic_lighting_and_powerful_waves.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	black and white street photography of a rainy night in New York and reflections on wet pavement
Seed:	5875644702106514845
File:	images/black_and_white_street_photography_of_a_rainy_night_in_New_York_and_reflections_on_wet_pavement.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	documentary style photography of a bustling marketplace with spices and textiles
Seed:	569624245665203172
File:	images/documentary_style_photography_of_a_bustling_marketplace_with_spices_and_textiles.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	isometric digital art of a medieval village with thatched roofs and market square and townsfolk
Seed:	927279715280377396
File:	images/isometric_digital_art_of_a_medieval_village_with_thatched_roofs_and_market_square_and_townsfolk.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	minimalist digital artwork with an abstract geometric pattern and a harmonious color palette
Seed:	6472590748610212580
File:	images/minimalist_digital_artwork_with_an_abstract_geometric_pattern_and_a_harmonious_color_palette.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	abstract painting representing the sound of jazz music using vibrant colors and erratic shapes
Seed:	1673163939761222425
File:	images/abstract_painting_representing_the_sound_of_jazz_music_using_vibrant_colors_and_erratic_shapes.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	abstract painting representing the sound of rock music using vibrant colors and erratic shapes
Seed:	4399694074144361114
File:	images/abstract_painting_representing_the_sound_of_rock_music_using_vibrant_colors_and_erratic_shapes.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	digital art with straight houses in foreground and background digital art with sharp focus in anime style
Seed:	8614524663522009705
File:	images/digital_art_with_straight_houses_in_foreground_and_background_digital_art_with_sharp_focus_in_anime_style.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	gingerbread house diorama in focus with crunch cereal
Seed:	6376822276508320053
File:	images/gingerbread_house_diorama_in_focus_with_crunch_cereal.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	cartoon cow portrait digital art in high definition and Andy Warhol style
Seed:	7712615263415609303
File:	images/cartoon_cow_portrait_digital_art_in_high_definition_and_Andy_Warhol_style.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	dystopian image with earth in chain
Seed:	5265381263075280832
File:	images/dystopian_image_with_earth_in_chain.png



  0%|          | 0/100 [00:00<?, ?it/s]

Prompt:	a majestic lion jumping from a big stone at night
Seed:	164983059114210558
File:	images/a_majestic_lion_jumping_from_a_big_stone_at_night.png



In [10]:
refiner_pipeline = DiffusionPipeline.from_pretrained(
    REFINER_MODEL,
    text_encoder_2=base_pipeline.text_encoder_2,
    vae=base_pipeline.vae,
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16",
)

Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

In [11]:
refiner_pipeline = refiner_pipeline.to('cuda')

In [12]:
base_pipeline = None

torch.cuda.empty_cache()

In [14]:
for i in range(len(prompts)):

    prompt     = prompts[i]
    base_image = base_images[i]

    refiner_images = refiner_pipeline(
        prompt=prompt,
        image=base_image,
    ).images

    file = image_descriptions[i]['File']

    print(f"Prompt:\t{prompt}\n")

    media.write_image('/content/drive/MyDrive/images/' + file + '.png', refiner_images[0])

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

In [16]:
for image_description in image_descriptions:
    prompt = image_description['Prompt']
    seed   = image_description['Seed']
    file   = image_description['File']

    print(f"Prompt:\t{prompt}\nSeed:\t{seed}",          file=open('/content/drive/MyDrive/README.md', 'a'))
    print(f"![images/{file}.png](images/{file}.png)\n", file=open('/content/drive/MyDrive/README.md', 'a'))
