<a href="https://colab.research.google.com/github/karaage0703/stable-diffusion-colab-tools/blob/main/002_latents_space_walker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Stable Diffusion Latents Space walker
Walk in latents space of Stable Diffusion

GitHub repository is below:
[stable-diffusion-colab-tools](https://github.com/karaage0703/stable-diffusion-colab-tools)

In [None]:
#@title **Hugging Face Login**
#@markdown　You need access token of Hugging Face.

!pip -qq install diffusers==0.7.2
!pip -qq install transformers
!pip install -qq tqdm

from huggingface_hub import notebook_login
notebook_login()

In [None]:
#@title **Setup**
#@markdown　Execute for setup

import torch
from diffusers import StableDiffusionPipeline
import numpy as np
from tqdm import tqdm
from matplotlib import pyplot as plt

fig = plt.figure(figsize=(10,10))

device = "cuda"
model_id = "CompVis/stable-diffusion-v1-4"

pipe = StableDiffusionPipeline.from_pretrained(
    model_id,
    revision="fp16",
    torch_dtype=torch.float16,
    use_auth_token=True,
).to(device)

In [None]:
#@title **Connect Google Drive**
#@markdown　Input output directory and execute for connecting Google Drive
import os
from pathlib import Path
from google.colab import drive
from datetime import datetime
from pytz import timezone

drive.mount('/content/drive')

save_dir_name = '/content/drive/MyDrive/stable_diffusion/output' #@param {type:"string"}

OUTPUT_DIR = Path(save_dir_name) / datetime.now(timezone('Asia/Tokyo')).strftime("%Y-%m-%d-%H-%M-%S")

os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
#@title **Generate Image**
#@markdown　Enter Parameter  (Attention: Seed=-1 is random)

prompt = 'karaage' #@param {type:"string"}
seed_number = 42 #@param
num_inference_steps  = 50 #@param {type:"slider", min:1, max:200, step:1}
guidance_scale_value = 7.5 #@param {type:"slider", min:1, max:20, step:0.1}
width_image = 512 #@param {type:"slider", min:64, max:640, step:64}
height_image = 512 #@param {type:"slider", min:64, max:640, step:64}

def infer(prompt, seed_number, num_inference_steps, guidance_scale_value, width_image, height_image):
    generator = torch.Generator(device=device)
    latents = None

    # Get a new random seed, store it and use it as the generator state
    if seed_number < 0:
        seed = generator.seed()
    else:
        seed = seed_number

    generator = generator.manual_seed(seed)

    image_latent = torch.randn(
        (1, pipe.unet.in_channels, height_image // 8, width_image // 8),
        generator = generator,
        device = device
    )

    with torch.autocast('cuda'):
        image = pipe(
            [prompt],
            width=width_image,
            height=height_image,
            guidance_scale=guidance_scale_value,
            num_inference_steps=num_inference_steps,
            latents = image_latent
        ).images[0]

    return image, image_latent

def draw_image_from_latents(prompt, num_inference_steps, guidance_scale_value, width_image, height_image, image_latent):
    with torch.autocast('cuda'):
        image = pipe(
            [prompt],
            width=width_image,
            height=height_image,
            guidance_scale=guidance_scale_value,
            num_inference_steps=num_inference_steps,
            latents = image_latent
        ).images[0]

    return image

def draw_image(image):
    fig = plt.figure(figsize=(10,10))
    plt.imshow(image)
    plt.axis('off')
    plt.show()

image, latents = infer(prompt, seed_number, num_inference_steps, guidance_scale_value, width_image, height_image)

draw_image(image)

In [None]:
#@title **Random walking and save images**
#@markdown　Execute for generate images

number_frames = 20 #@param
max_distance = 0.1 #@param {type:"slider", min:0.01, max:0.5, step:0.01}
random_walk = np.random.default_rng()
save_image = True #@param {type:"boolean"}

# random walk in latent space
image_cv = []

for n in tqdm(range(number_frames)):
    for i in range(latents.size()[1]):
        for j in range(latents.size()[2]):
            for k in range(latents.size()[3]):
                latents[0][i][j][k] += random_walk.uniform(-max_distance, max_distance)


    image = draw_image_from_latents(prompt, num_inference_steps, guidance_scale_value, width_image, height_image, latents)
    print('below image is number ' + str(n))
    draw_image(image)
    file_path = os.path.join(OUTPUT_DIR, 'sd_{}.png'.format(n))
    if save_image:
        image.save(file_path)
    image_cv.append(np.array(image, dtype=np.uint8))

In [None]:
#@title **Make movie**
#@markdown　Set interval time[ms]
interval = 500 #@param

import matplotlib.pyplot as plt
import matplotlib.animation
from IPython.display import HTML

matplotlib.rcParams['animation.embed_limit'] = 2**128
plt.figure(figsize=(image_cv[0].shape[1] / 72.0, image_cv[0].shape[0] / 72.0), dpi = 72)
patch = plt.imshow(image_cv[0])
plt.axis('off')
animate = lambda i: patch.set_data(image_cv[i])
ani = matplotlib.animation.FuncAnimation(plt.gcf(), animate, frames=len(image_cv), interval = interval)
HTML(ani.to_jshtml())

In [None]:
#@title **Save movie file**

save_filename = 'stable_diffusion_movie' #@param {type:"string"}
file_movie_path = os.path.join(OUTPUT_DIR, '{}.mp4'.format(save_filename))

ani.save(file_movie_path)

## Reference
Special Thanks
- https://colab.research.google.com/github/pcuenca/diffusers-examples/blob/main/notebooks/stable-diffusion-seeds.ipynb
- https://colab.research.google.com/github/nakamura196/ndl_ocr/blob/main/ndl_ocr_v2.ipynb