In [None]:
# for cloud notebooks, uncomment to install huggingface/diffusers
#! pip install diffusers

In [None]:
import os

import numpy as np
import torch

from diffusers import StableDiffusionPipeline, AutoPipelineForImage2Image

from diffusers.pipelines.pipeline_utils import numpy_to_pil
from transformers import CLIPTokenizer, CLIPTextModel
from diffusers import AutoencoderKL, UNet2DConditionModel, \
        PNDMScheduler, LMSDiscreteScheduler

from PIL import Image
import matplotlib.pyplot as plt

In [None]:
def seed_all(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)

def grid_show(images, rows=3):

    number_images = len(images)
    height, width = images[0].size
    
    columns = int(np.ceil(number_images / rows))
    grid = np.zeros((height*rows,width*columns,3)) #Image.new("RGB", size=(width*columns, height*rows))
    for ii, image in enumerate(images):
        grid[ii//columns*height:ii//columns*height+height, \
                ii%columns*width:ii%columns*width+width] = image
    fig, ax = plt.subplots(1,1, figsize=(3*columns, 3*rows))
    ax.imshow(grid / grid.max())
    return grid, fig, ax

def callback_stash_latents(ii, tt, latents):
    # adapted from https://github.com/fastai/diffusion-nbs/stable_diffusion.ipynb
    latents = 1.0 / 0.18215 * latents
    image = pipe.vae.decode(latents).sample[0]
    image = (image / 2. + 0.5).cpu().permute(1,2,0).numpy()
    image = np.clip(image, 0, 1.0)
    images.extend(pipe.numpy_to_pil(image))

my_seed = 27
assert my_seed < 2**32

In [None]:
# change this cell to run on CPU, or to choose another model. 
if (1):
    #Run CompVis/stable-diffusion-v1-4 on GPU
    pipe_name = "CompVis/stable-diffusion-v1-4"
    my_dtype = torch.float16
    my_device = torch.device("cuda")
    my_variant = "fp16"
    pipe = StableDiffusionPipeline.from_pretrained(pipe_name,\
                    safety_checker=None, variant=my_variant, torch_dtype=my_dtype).to(my_device)
elif (0):
    #Run CompVis/stable-diffusion-v1-4 on CPU. 
    pipe_name = "CompVis/stable-diffusion-v1-4"
    my_dtype = torch.float32 #torch.float16
    my_device = torch.device("cpu") #torch.device("cuda")
    pipe = StableDiffusionPipeline.from_pretrained(pipe_name, torch_dtype=my_dtype).to(my_device)
    #pipe = StableDiffusionPipeline.from_pretrained(pipe_name, torch_dtype=my_dtype, variant="fp16").to(my_device)
else:
    #Run stabilityai/stable-diffusion-xl-base-1.0 on GPU (keep in mind your VRAM usage)
    pipe_name = "stabilityai/stable-diffusion-xl-base-1.0"
    my_dtype = torch.float32
    my_device = torch.device("cpu")
    pipe = StableDiffusionPipeline.from_pretrained(pipe_name, torch_dtype=my_dtype).to(my_device)

# Pareidolia: Finding Patterns in Noise

[https://en.wikipedia.org/wiki/Pareidolia](https://en.wikipedia.org/wiki/Pareidolia)

In [None]:
my_prompt = "Rorschach test ink blot looks like emoji"
seed_all(my_seed)
my_output = pipe(my_prompt, num_inference_steps=50, num_images_per_prompt=9, guidance_scale=9.0)
temp = grid_show(my_output.images, rows=3)
plt.show()

for ii, img in enumerate(my_output.images):
    img.save(f"latte_{my_seed}_{ii}.jpg")
    
    
my_prompt = "😬 inadvertent 😬 latte art face, accidental latte art, cozy, detail, intricate, cafe, coffee"
seed_all(my_seed)
my_output = pipe(my_prompt, num_inference_steps=50, num_images_per_prompt=9, guidance_scale=9.0)
temp = grid_show(my_output.images, rows=3)
plt.show()

for ii, img in enumerate(my_output.images):
    img.save(f"latte_{my_seed}_{ii}.jpg")

my_prompt = "piece of toasted bread bears the likeness of a realistic human face on golden-brown burnt toast surface, shroud"
seed_all(my_seed)
my_output = pipe(my_prompt, num_inference_steps=50, num_images_per_prompt=9, guidance_scale=9.0)
temp = grid_show(my_output.images, rows=3)
plt.show()

for ii, img in enumerate(my_output.images):
    img.save(f"toast_{my_seed}_{ii}.jpg")

# Latent Diffusion: The Pieces

In [None]:
# prepare latent diffusion components
prompt = ["Robotic lunar rover, NASA, JPL/Caltech"]
seed_all(my_seed)

# image settings
height, width = 512, 512

#diffusion settings
number_inference_steps = 64
guidance_scale = 9.0
batch_size = 1

# diffusion pieces
vae = AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", \
        subfolder="vae")
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
unet = UNet2DConditionModel.from_pretrained("CompVis/stable-diffusion-v1-4",\
        subfolder="unet")
scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000)
#PNDMScheduler()
scheduler.set_timesteps(number_inference_steps)

vae = vae.to(my_device)
text_encoder = text_encoder.to(my_device)
unet = unet.to(my_device)

prompt = prompt * batch_size
# initialize text embeddings
tokens = tokenizer(prompt, padding="max_length",\
        max_length=tokenizer.model_max_length, truncation=True,\
        return_tensors="pt")

empty_tokens = tokenizer([""] * batch_size, padding="max_length",\
        max_length=tokenizer.model_max_length, truncation=True,\
        return_tensors="pt")

with torch.no_grad():
    text_embeddings = text_encoder(tokens.input_ids.to(my_device))[0]
    max_length = tokens.input_ids.shape[-1]
    
    notext_embeddings = text_encoder(empty_tokens.input_ids.to(my_device))[0]

text_embeddings = torch.cat([notext_embeddings, text_embeddings])

# initialize latent space
latents = torch.randn(batch_size, unet.config.in_channels, height//8, width//8)
latents = latents * scheduler.init_noise_sigma
latents = latents.to(my_device)

In [None]:
images = []
display_every = number_inference_steps // 8

# diffusion loop
for step_idx, timestep in enumerate(scheduler.timesteps):

    with torch.no_grad():
        model_in = torch.cat([latents] * 2)
        model_in = scheduler.scale_model_input(model_in, timestep).to(my_device)

        predicted_noise = unet(model_in, timestep, \
                encoder_hidden_states=text_embeddings).sample

        # pnu - empty prompt unconditioned noise prediction
        # pnc - text prompt conditioned noise prediction
        pnu, pnc = predicted_noise.chunk(2)
        # weight noise predictions
        predicted_noise = pnu + guidance_scale * (pnc - pnu)

        # update the latents
        latents = scheduler.step(predicted_noise, \
                timestep, latents).prev_sample

        if step_idx % display_every == 0\
                or step_idx + 1 == len(scheduler.timesteps):
            image = vae.decode(latents / 0.18215).sample[0]
            image = ((image / 2.) + 0.5).cpu().permute(1,2,0).numpy()
            image = np.clip(image, 0, 1.0)
            
            images.extend(numpy_to_pil(image))

            print(f"step {step_idx}/{number_inference_steps}: {timestep:.4f}")


In [None]:
temp = grid_show(images)
plt.savefig("pieces.jpg")
images[-1].save("lunar_rover_pieces.jpg")

# The Pipeline

In [None]:
my_prompt = "Artist's impression of first astronaut on Mars giving a thumbs-up 👍 after discovering fungoid alien Martian life"\
        ", hyper-realistic, realism, retro-futuristic, intricate, detailed, golden hour"

seed_all(my_seed)

images = []
my_output = pipe(my_prompt, num_inference_steps=50, callback=callback_stash_latents, \
        callback_steps=6, num_images_per_prompt=1, guidance=8.0)

images.append(my_output.images[0])

In [None]:
temp = grid_show(images, rows=3)
plt.savefig("astro_denoising.jpg")
plt.show()

my_output.images[0].save("denoised_astro.jpg")

In [None]:
seed_all(my_seed)
my_output_astro = pipe(my_prompt, num_inference_steps=50, num_images_per_prompt=9)

In [None]:
temp = grid_show(my_output_astro.images)
plt.show()
for ii, img in enumerate(my_output_astro.images):
    img.save(f"human_astro_{my_seed}_{ii}.jpg")

# Negative Prompts

Tell the diffusion pipeline what to avoid for better control.

In [None]:
my_prompt = "Artist's impression of first astronaut on Mars giving a thumbs-up 👍 after discovering fungoid alien Martian life"\
        ", hyper-realistic, realism, retro-futuristic, intricate, detailed, golden hour"

my_negative_prompt = "human, astronaut, person, man, woman, Earthling"


In [None]:
seed_all(my_seed)
my_output_alien = pipe(my_prompt, num_inference_steps=50, num_images_per_prompt=9, \
        negative_prompt=my_negative_prompt)

In [None]:
temp = grid_show(my_output_alien.images)
plt.show()

for ii, img in enumerate(my_output_alien.images):
    img.save(f"nonhuman_astro_{my_seed}_{ii}.jpg")

# Guidance

Control how latents are updated by changing how strongly the text and null embedding are weighted

In [None]:
guidance_images = []
    
for guidance in [0.25, 0.5, 1., 2.0, 4.0, 6.0, 8.0, 10.0, 14.0]:
    seed_all(my_seed)
    my_output = pipe(my_prompt, num_inference_steps=50, num_images_per_prompt=1, guidance_scale=guidance)
    guidance_images.append(my_output.images[0])
     
    for ii, img in enumerate(my_output.images):
        img.save(f"mice_{my_seed}_g{int(guidance*2)}_tea3_{ii}.jpg")

temp = grid_show(guidance_images, rows=3) #my_output.images, rows=3)
plt.savefig("cozy_mice_guidance.jpg")
plt.show()

# Starting from an Initial Image

Use an initial image as a sketch for inpainting, outpainting, image-to-image variation, etc. 

In [None]:
pipe_img2img = AutoPipelineForImage2Image.from_pretrained(\
    "runwayml/stable-diffusion-v1-5", torch_dtype=my_dtype, use_safetensors=True).to(my_device)

# Image variation


In [None]:
if not (os.path.exists("600px-TRAPPIST-1e_artist_impression_2018.png")):
    os.system("wget 'https://upload.wikimedia.org/wikipedia/commons/thumb/3/38/TRAPPIST-1e_artist_impression_2018.png/600px-TRAPPIST-1e_artist_impression_2018.png'")

init_image = Image.open("600px-TRAPPIST-1e_artist_impression_2018.png").resize((128,128)).resize((512,512))

init_image

In [None]:
seed_all(my_seed)

trappist_prompt = "Artist's impression of TRAPPIST-1e, a rocky water-world exoplanet ocean-bearing world,"\
        " orbiting within the habitable (or Goldilocks) zone"\
        " of the ultracool dwarf star TRAPPIST-1"\
        "NASA, artist concept, , detailed, intricate, art, reconstruction"

my_output_trappist1e = pipe_img2img(prompt=trappist_prompt, num_images_per_prompt=9, \
        image=init_image, guidance_scale=5.0)

grid_show(my_output_trappist1e.images)
plt.show()

for ii, img in enumerate(my_output_trappist1e.images):
    img.save(f"trappist1e_waterocean_{my_seed}_{ii}.jpg")

# Use an initial image as a starting sketch

In [None]:
import os
# https://www.jpl.nasa.gov/news/chasing-oumuamua
if not (os.path.exists("imagesasteroid20180627Oumuamua.2e16d0ba.fill-400x400-c50.gif")):
    os.system("wget 'https://d2pn8kiwq2w21t.cloudfront.net/images/imagesasteroid20180627Oumuamua.2e16d0ba.fill-400x400-c50.gif'")

init_image = Image.open("imagesasteroid20180627Oumuamua.2e16d0ba.fill-400x400-c50.gif")

In [None]:
init_image.seek(80)
init_image = init_image.resize((512,512))


In [None]:
seed_all(my_seed)

oumuamua_prompt = "Interstellar object"\
        " Oumuamua is an elongated alien spacecraft"\
        " artist concept, reconstruction, realistic render, NASA/JPL-Caltech "
my_negative_prompt = "normal asteroid"

oumuamua = pipe_img2img(prompt=oumuamua_prompt, num_images_per_prompt=9, \
        image=init_image, guidance_scale=10.0, negative_prompt=my_negative_prompt)

grid_show(oumuamua.images)
plt.show()

for ii, img in enumerate(oumuamua.images):
    img.save(f"oumuamua_{my_seed}_{ii}.jpg")

# Change the style of an image

In [None]:
# https://photojournal.jpl.nasa.gov/catalog/PIA04413

if not (os.path.exists("300px-NASA_Mars_Rover.jpg")):
    os.system("wget 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/d8/NASA_Mars_Rover.jpg/300px-NASA_Mars_Rover.jpg'")

init_image = Image.open("300px-NASA_Mars_Rover.jpg").crop((0,0,256,256)).resize((512,512))

seed_all(my_seed)

rover_prompt = "Cute cartoon watercolor of NASA's Mars Opportunity rover, doing a good job on Mars"\
        ", cozy, space, NASA, watercolour, art"

rover_wc = pipe_img2img(prompt=rover_prompt, num_images_per_prompt=9, \
            image=init_image, guidance_scale=10.0)

grid_show(rover_wc.images)
plt.show()

for ii, img in enumerate(rover_wc.images):
    img.save(f"rover_wc_{my_seed}_{ii}.jpg")
    my_cmap = plt.get_cmap("plasma")
    
    fig, ax = plt.subplots(1,2, figsize=(8,4))
    ax[1].imshow(init_image)
    ax[1].set_title("Initial image")
    ax[0].imshow(img)
    ax[0].set_title("After diffusion (watercolor)")
    
    fig.text(.44, .35, "→", color=my_cmap(192), fontsize=128)
    
    for idx in range(2):
        ax[idx].set_yticklabels("")
        ax[idx].set_xticklabels("")
        
    plt.show()