# **Stable diffusion noise vector**

Create noise vectors for each artists in https://gorgeous.adityashankar.xyz/

## Check Environment

In [None]:
import os

if 'COLAB_GPU' in os.environ:
    print("Environment is colab")
    env = "colab"
elif 'KAGGLE_URL_BASE' in os.environ:
    env = "kaggle"
    print("Environment is kaggle")
else:
    env = "local"
    print("Environment is local")

Environment is colab


### Set ENV_TYPE
If ENV_TYPE is equal to "TEST", then some examples of data will be run in order to test whether the notebooks running or not.
Otherwise, the notebook will run on the whole dataset

In [None]:
ENV_TYPE="TEST"

In [None]:
import os
import sys

if ENV_TYPE == "TEST":
    model_path = "/input/models"
    base_directory = "../"
else:
    # clone the repository
    !git clone https://github.com/kk-digital/kcg-ml-sd1p4.git

    # move to the repo
    %cd kcg-ml-sd1p4/

    model_path = "./"
    # Get the current directory
    base_directory = os.getcwd()
    base_directory = os.path.join(base_directory, 'kcg-ml')
    # download model weights
    !wget https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/resolve/main/sd-v1-4.ckpt

# Construct the paths based on the current directory
stable_diffusion_path = os.path.join(base_directory, 'stable_diffusion')

# Insert the paths into sys.path
sys.path.insert(0, stable_diffusion_path)

--2023-05-28 08:57:35--  https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/resolve/main/sd-v1-4.ckpt
Resolving huggingface.co (huggingface.co)... 18.155.68.116, 18.155.68.44, 18.155.68.121, ...
Connecting to huggingface.co (huggingface.co)|18.155.68.116|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs.huggingface.co/repos/4c/37/4c372b4ebb57bbd02e68413d4951aa326d4b3cfb6e62db989e529c6d4b26fb21/fe4efff1e174c627256e44ec2991ba279b3816e364b49f9be2abc0b3ff3f8556?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27sd-v1-4.ckpt%3B+filename%3D%22sd-v1-4.ckpt%22%3B&Expires=1685519234&Policy=eyJTdGF0ZW1lbnQiOlt7IlJlc291cmNlIjoiaHR0cHM6Ly9jZG4tbGZzLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzRjLzM3LzRjMzcyYjRlYmI1N2JiZDAyZTY4NDEzZDQ5NTFhYTMyNmQ0YjNjZmI2ZTYyZGI5ODllNTI5YzZkNGIyNmZiMjEvZmU0ZWZmZjFlMTc0YzYyNzI1NmU0NGVjMjk5MWJhMjc5YjM4MTZlMzY0YjQ5ZjliZTJhYmMwYjNmZjNmODU1Nj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoiLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NU

# **Install the requirements**

In [None]:
!pip3 install -r requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting accelerate==0.19.0 (from -r requirements.txt (line 1))
  Downloading accelerate-0.19.0-py3-none-any.whl (219 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m219.1/219.1 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting diffusers==0.16.1 (from -r requirements.txt (line 2))
  Downloading diffusers-0.16.1-py3-none-any.whl (934 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m934.9/934.9 kB[0m [31m63.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops==0.6.1 (from -r requirements.txt (line 3))
  Downloading einops-0.6.1-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting Flask==2.3.2 (from -r requirements.txt (line 5))
  Downloading Flask-2.3.2-py3-none-any.whl (96 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# **Computing Platform Check GPU (CUDA) or CPU / Environment**


In [None]:
import torch
if torch.cuda.is_available():
    device = "cuda"
else:
    print ('[WARNING] CUDA/GPU is not available! Compute-intensive scripts on this notebook will be run on CPU.')
    device =  "cpu"

# **Import the Module/Utility**

In [None]:
import random
import sys
import importlib
from PIL import Image
import os
import time
import json

from stable_diffusion.sampler.ddim import DDIMSampler
from stable_diffusion.sampler.ddpm import DDPMSampler
from stable_diffusion.util import load_model, save_images, set_seed
from stable_diffusion.model.unet_attention import CrossAttention
from stable_diffusion.util import save_images, set_seed

In [None]:
from stable_diffusion.latent_diffusion import LatentDiffusion
from pathlib import Path

class Txt2Img:
    """
    ### Text to image class
    """
    model: LatentDiffusion

    def __init__(self, *,
                 checkpoint_path: Path,
                 sampler_name: str,
                 n_steps: int = 50,
                 ddim_eta: float = 0.0,
                 force_cpu: bool = False
                 ):
        """
        :param checkpoint_path: is the path of the checkpoint
        :param sampler_name: is the name of the [sampler](../sampler/index.html)
        :param n_steps: is the number of sampling steps
        :param ddim_eta: is the [DDIM sampling](../sampler/ddim.html) $\eta$ constant
        """
        device_id = "cuda:0" if torch.cuda.is_available() else "cpu"

        if force_cpu:
            device_id = "cpu"

        # Load [latent diffusion model](../latent_diffusion.html)
        self.model = load_model(checkpoint_path, device_id)
        # Get device or force CPU if requested
        self.device = torch.device(device_id)

        # Move the model to device
        self.model.to(self.device)

        # Initialize [sampler](../sampler/index.html)
        if sampler_name == 'ddim':
            self.sampler = DDIMSampler(self.model,
                                       n_steps=n_steps,
                                       ddim_eta=ddim_eta)
        elif sampler_name == 'ddpm':
            self.sampler = DDPMSampler(self.model)

    @torch.no_grad()
    def __call__(self, *,
                 seed: int = 0,
                 dest_path: str,
                 batch_size: int = 1,
                 prompt: str,
                 h: int = 512, w: int = 512,
                 uncond_scale: float = 7.5,
                 low_vram: bool = False,
                 ):
        """
        :param seed: the seed to use when generating the images
        :param dest_path: is the path to store the generated images
        :param batch_size: is the number of images to generate in a batch
        :param prompt: is the prompt to generate images with
        :param h: is the height of the image
        :param w: is the width of the image
        :param uncond_scale: is the unconditional guidance scale $s$. This is used for
            $\epsilon_\theta(x_t, c) = s\epsilon_\text{cond}(x_t, c) + (s - 1)\epsilon_\text{cond}(x_t, c_u)$
        :param low_vram: whether to limit VRAM usage
        """
        # Number of channels in the image
        c = 4
        # Image to latent space resolution reduction
        f = 8

        set_seed(seed)
        # Adjust batch size based on VRAM availability
        if low_vram:
            batch_size = 1

        # Make a batch of prompts
        prompts = batch_size * [prompt]

        # AMP auto casting
        cpu_or_cuda = "cpu" if self.device == torch.device("cpu") else "cuda"
        with torch.autocast(cpu_or_cuda):
            # In unconditional scaling is not $1$ get the embeddings for empty prompts (no conditioning).
            if uncond_scale != 1.0:
                un_cond = self.model.get_text_conditioning(batch_size * [""])
            else:
                un_cond = None
            # Get the prompt embeddings
            cond = self.model.get_text_conditioning(prompts)
            # [Sample in the latent space](../sampler/index.html).
            # `x` will be of shape `[batch_size, c, h / f, w / f]`
            x = self.sampler.sample(cond=cond,
                                    shape=[batch_size, c, h // f, w // f],
                                    uncond_scale=uncond_scale,
                                    uncond_cond=un_cond)
            # Decode the image from the [autoencoder](../model/autoencoder.html)
            images = self.model.autoencoder_decode(x)

        # Save images
        save_images(images, dest_path)

    # functions for pipeline
    @torch.no_grad()
    def generate_text_embeddings(self,
                                 seed,
                                 prompt,
                                 batch_size=4,
                                 uncond_scale=7.5,
                                 low_vram: bool = False,):
        set_seed(seed)
        # Adjust batch size based on VRAM availability
        if low_vram:
            batch_size = 1

        # Make a batch of prompts
        prompts = batch_size * [prompt]

        # AMP auto casting
        cpu_or_cuda = "cpu" if self.device == torch.device("cpu") else "cuda"
        with torch.autocast(cpu_or_cuda):
            # In unconditional scaling is not $1$ get the embeddings for empty prompts (no conditioning).
            if uncond_scale != 1.0:
                un_cond = self.model.get_text_conditioning(batch_size * [""])
            else:
                un_cond = None
            # Get the prompt embeddings
            cond = self.model.get_text_conditioning(prompts)
        return cond, un_cond

    @torch.no_grad()
    def generate_latent_space(self, cond, un_cond, batch_size=4, uncond_scale=7.5, h=512, w=512):
        # Number of channels in the image
        c = 4
        # Image to latent space resolution reduction
        f = 8
        # [Sample in the latent space](../sampler/index.html).
        # `x` will be of shape `[batch_size, c, h / f, w / f]`
        x = self.sampler.sample(cond=cond,
                                shape=[batch_size, c, h // f, w // f],
                                uncond_scale=uncond_scale,
                                uncond_cond=un_cond)
        # return the embeddings
        return x

    @torch.no_grad()
    def generate_image(self, x, dest_path):
        # Decode the image from the [autoencoder](../model/autoencoder.html)
        images = self.model.autoencoder_decode(x)

        # Save images
        save_images(images, dest_path)

### **Read artist list**

In [None]:
url = "../input/artists.txt"
prompt = "A woman with flowers in her hair in a courtyard, in the style of"
seeds = [0, 1, 2, 3, 5, 8, 13, 21]
file = open(url, "r")
artist_list = [artist[:-1] for artist in file]
if ENV_TYPE == "TEST":
    artist_list = artist_list[0:3]
print(artist_list[0:3])

['Frank Frazetta', 'Niklas Jansson', 'androidarts']


### **Step 3 Assign integer to each artist**

In [None]:
artist_dict = {}
for artist in artist_list:
    index = artist_list.index(artist)
    index = str(index).zfill(4)
    artist_dict[index] = {'id': index,
                          'name': artist
                         }

### **Step 4 generates prompt function**

In [None]:
def generate_prompt(artist, prompt):
    return f"{prompt} {artist}"

In [None]:
for artist_id in artist_dict:
    artist_dict[artist_id]['prompt'] = generate_prompt(artist_dict[artist_id]['name'], prompt)
print(artist_dict['0000'])

{'id': '0000', 'name': 'Frank Frazetta', 'prompt': 'A woman with flowers in her hair in a courtyard, in the style of Frank Frazetta'}


### **Step 5 set noise vector**

In [None]:
def set_noise_vector(seed, device, height=512, factor=8, width=512):
    set_seed(seed)
    return torch.randn([1, 4, height // factor, width // factor], device='cpu')

### **Step 6 save n noise vector**

In [None]:
# create a list of n noise vectors name
def create_n_vector_name(n_vector):
    noise_vector_name = []
    for i in range(0,n_vector):
        name = f"n{i:03d}"
        noise_vector_name.append(name)
    return noise_vector_name

In [None]:
prompt_and_seed = []
for artist_id in artist_dict:
    noise_vector_name = create_n_vector_name(4)
    noise_vectors = {}    
    for i in range(0,len(noise_vector_name)):
        vector = set_noise_vector(seeds[i], device)
        noise_vectors[noise_vector_name[i]] = vector
        image_name = f"a{artist_id}{noise_vector_name[i]}.jpg"
        prompt_and_seed.append([artist_dict[artist_id]['prompt'], seeds[i], image_name])
    artist_dict[artist_id]['noise_vectors'] = noise_vectors
    artist_dict[artist_id]['seeds'] = seeds

In [None]:
print(prompt_and_seed[0:3])

[['A woman with flowers in her hair in a courtyard, in the style of Frank Frazetta', 0, 'a0000n000.jpg'], ['A woman with flowers in her hair in a courtyard, in the style of Frank Frazetta', 1, 'a0000n001.jpg'], ['A woman with flowers in her hair in a courtyard, in the style of Frank Frazetta', 2, 'a0000n002.jpg']]


### save artist_dict as json

In [None]:
new_dict = artist_dict
for artist in new_dict:
    for noise_vector in new_dict[artist]['noise_vectors']:
        if isinstance(new_dict[artist]['noise_vectors'][noise_vector], torch.Tensor):
            new_dict[artist]['noise_vectors'][noise_vector] = new_dict[artist]['noise_vectors'][noise_vector].cpu().numpy().tolist()
with open("artist_noise_vectors.json", "w") as f:
    json.dump(new_dict, f)

### **Step 7 check that same noise vector and prompt generates the same image**

In [None]:
CHECKPOINT = os.path.join(model_path, './sd-v1-4.ckpt')
output_dir = './output/'
sampler_name = "ddim"
steps = 50

In [None]:
text_to_image = Txt2Img(checkpoint_path=CHECKPOINT,
                        sampler_name=sampler_name,
                        n_steps=steps)

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/961k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/905 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/4.52k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

Some weights of the model checkpoint at openai/clip-vit-large-patch14 were not used when initializing CLIPTextModel: ['vision_model.encoder.layers.20.mlp.fc1.bias', 'vision_model.encoder.layers.8.self_attn.q_proj.bias', 'vision_model.encoder.layers.15.mlp.fc2.bias', 'vision_model.encoder.layers.21.layer_norm1.weight', 'text_projection.weight', 'vision_model.encoder.layers.4.self_attn.out_proj.bias', 'vision_model.encoder.layers.12.mlp.fc1.bias', 'vision_model.encoder.layers.22.mlp.fc2.bias', 'vision_model.encoder.layers.12.mlp.fc2.bias', 'vision_model.encoder.layers.19.mlp.fc1.bias', 'vision_model.encoder.layers.22.mlp.fc1.bias', 'vision_model.encoder.layers.9.mlp.fc2.weight', 'vision_model.encoder.layers.17.self_attn.q_proj.weight', 'vision_model.encoder.layers.22.layer_norm2.bias', 'vision_model.encoder.layers.14.layer_norm1.weight', 'vision_model.encoder.layers.18.self_attn.out_proj.bias', 'vision_model.encoder.layers.17.mlp.fc1.bias', 'vision_model.encoder.layers.22.self_attn.k_pro

In [None]:

print(f"number of images being generate: {len(prompt_and_seed)}")
start_point = time.time()
for prompt, seed, image_name in prompt_and_seed:
    start_time = time.time()
    image_path = os.path.join(output_dir, image_name)
    cond, un_cond = text_to_image.generate_text_embeddings(seed, prompt)
    latent_space = text_to_image.generate_latent_space(cond, un_cond)
    text_to_image.generate_image(latent_space, image_path)
    print(f"time to generate image number {prompt_and_seed.index([prompt, seed, image_name])} as {image_path} is: {time.time()- start_time}")
print(f"total run time is: {time.time() - start_point}")

number of images being generate: 12


time to generate image number 0 as ./output/a0000n000.jpg is: 126.20038080215454
time to generate image number 1 as ./output/a0000n001.jpg is: 124.9128692150116
