---
# **Start Here**
This assumes you have the model in your directory and it is named "model.ckpt"

---


---
# **Text 2 Image**
---



## 1. Setup

In [None]:
cd stable-diffusion

In [None]:
import argparse, os, sys, glob
import torch
import numpy as np
from omegaconf import OmegaConf
from PIL import Image
from tqdm import tqdm, trange
from itertools import islice
from einops import rearrange
from torchvision.utils import make_grid
import time
from pytorch_lightning import seed_everything
from torch import autocast
from contextlib import contextmanager, nullcontext
from ldm.util import instantiate_from_config
from ldm.models.diffusion.ddim import DDIMSampler
from ldm.models.diffusion.plms import PLMSSampler

In [None]:
def chunk(it, size):
    it = iter(it)
    return iter(lambda: tuple(islice(it, size)), ())


def load_model_from_config(ckpt, verbose=False):
    print(f"Loading model from {ckpt}")
    pl_sd = torch.load(ckpt, map_location="cuda:0")
    if "global_step" in pl_sd:
        print(f"Global Step: {pl_sd['global_step']}")
    sd = pl_sd["state_dict"]
    return sd

In [None]:
def generate(opt,prompt,grid):
    device = 'cuda'
    images = []

    batch_size = opt.n_samples
    n_rows = opt.n_rows if opt.n_rows > 0 else batch_size

    assert prompt is not None
    data = [batch_size * [prompt]]

    start_code = torch.randn([opt.n_samples, opt.C, opt.H // opt.f, opt.W // opt.f], device=device)

    precision_scope = autocast if opt.precision=="autocast" else nullcontext
    with torch.no_grad():
        all_samples = list()
        for n in trange(opt.n_iter, desc="Sampling"):
            for prompts in tqdm(data, desc="data"):
                with precision_scope("cuda"):
                    #modelCS.to(device)
                    uc = None
                    if opt.scale != 1.0:
                        uc = modelCS.get_learned_conditioning(batch_size * [""])
                    if isinstance(prompts, tuple):
                        prompts = list(prompts)

                    c = modelCS.get_learned_conditioning(prompts)
                    shape = [opt.C, opt.H // opt.f, opt.W // opt.f]
                    #mem = torch.cuda.memory_allocated()/1e6
                    #modelCS.to("cpu")
                    #while(torch.cuda.memory_allocated()/1e6 >= mem):
                    #    time.sleep(1)

                    samples_ddim = model.sample(S=opt.ddim_steps,
                                                conditioning=c,
                                                batch_size=opt.n_samples,
                                                shape=shape,
                                                verbose=False,
                                                unconditional_guidance_scale=opt.scale,
                                                unconditional_conditioning=uc,
                                                eta=opt.ddim_eta,
                                                x_T=start_code)

                    #modelFS.to(device)
                    for i in range(batch_size):
                        x_samples_ddim = modelFS.decode_first_stage(samples_ddim[i].unsqueeze(0))
                        x_sample = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)

                        x_sample = 255. * rearrange(x_sample[0].cpu().numpy(), 'c h w -> h w c')
                        images +=[Image.fromarray(x_sample.astype(np.uint8))]
                    #mem = torch.cuda.memory_allocated()/1e6
                    #modelFS.to("cpu")
                    #while(torch.cuda.memory_allocated()/1e6 >= mem):
                    #    time.sleep(1)

                    if grid:
                        all_samples.append(x_samples_ddim)

                    del samples_ddim
                    print("memory_final = ", torch.cuda.memory_allocated()/1e6)
            if grid:
                grid = torch.stack(all_samples, 0)
                grid = rearrange(grid, 'n b c h w -> (n b) c h w')
                grid = make_grid(grid, nrow=n_rows)
                grid = 255. * rearrange(grid, 'c h w -> h w c').cpu().numpy()
                images = [Image.fromarray(grid.astype(np.uint8))] + images

            print(f'Finished!')
            return images

## 2. Load Model

In [None]:
ckpt = '../model.ckpt' # this points to the model that is in your root gdrive folder

sd = load_model_from_config(f"{ckpt}")
li = []
lo = []
for key, value in sd.items():
    sp = key.split('.')
    if(sp[0]) == 'model':
        if('input_blocks' in sp):
            li.append(key)
        elif('middle_block' in sp):
            li.append(key)
        elif('time_embed' in sp):
            li.append(key)
        else:
            lo.append(key)
for key in li:
    sd['model1.' + key[6:]] = sd.pop(key)
for key in lo:
    sd['model2.' + key[6:]] = sd.pop(key)

## 3. Settings

In [None]:
class config():
      def __init__(self):
        self.config = 'optimizedSD/v1-inference.yaml' # Don't change this
        self.ckpt = ckpt # If you want to change the model location, change it on the Load movel section

        self.precision = 'autocast' # Change to full and fuck your RAM
        self.ddim_eta = 0.0 # Does nothing, keep as is
        self.C = 4 # Keep as is

        self.seed = 435455

        self.ddim_steps = 30 # Keep within 30 ~ 250, higher is better but slower
        self.H = 256 # Height, the vertical resolution
        self.W = 256 # Width, the horizontal resolution
        self.f = 8 # Visual scale maybe, 256x256 with f = 4 seems to use same RAM as 512x512 with f = 8
        self.scale = 7.5 # Keep within 4 ~ 25, maybe, changes how the prompt is interpreted

        self.n_iter = 1 # Maybe improves it, reccomended to keep as is as it multiplies the waiting time
        self.n_samples = 9 # Amount of images outputted
        self.n_rows = 3 # How many images per row (used on grid)

opt = config()
seed_everything(opt.seed)

In [None]:
config = OmegaConf.load(f"{opt.config}")
config.modelUNet.params.ddim_steps = opt.ddim_steps

model = instantiate_from_config(config.modelUNet)
_, _ = model.load_state_dict(sd, strict=False)
model.eval()
    
modelCS = instantiate_from_config(config.modelCondStage)
_, _ = modelCS.load_state_dict(sd, strict=False)
modelCS.eval()
modelCS.to('cuda')

modelFS = instantiate_from_config(config.modelFirstStage)
_, _ = modelFS.load_state_dict(sd, strict=False)
modelFS.eval()
modelFS.eval()
modelFS.to('cuda')

if opt.precision == "autocast":
    model.half()
    modelCS.half()

## 4. Run prompt

In [None]:
prompt = "Anatomical cross section of a tree with a boney skeletal structure 1970s scientific diagram" #@param {type:"string"}
scale = 7.5 #@param {type:"number"}
height = 512 #@param {type:"integer"}
width = 512 #@param {type:"integer"}
n_steps = 30 #@param {type:"slider", min:30, max:250, step:5}
n_images = 4 #@param {type:"integer"}
n_rows = 2 #@param {type:"integer"}
grid = "yes" #@param ["yes", "no"]
opt.scale = scale
opt.H = height
opt.W = width
opt.n_samples = n_images
opt.n_rows = n_rows
opt.ddim_steps = n_steps
images = generate(opt=opt, prompt=prompt, grid=(grid=="yes"))

## Output

if you generated more than 9 images(the grid counts too), just add more code lines continuing the sequence

If you chose grid, it's the first one

Right click and save to download

In [None]:
images[0]

In [None]:
images[1]

In [None]:
images[2]

In [None]:
images[3]

In [None]:
images[4]

---
# **Image 2 Image**
---




## 1. Setup

In [None]:
#cd /content/gdrive/MyDrive/stable-diffusion

In [None]:
import argparse, os, sys, glob
import PIL
import time
import torch
import numpy as np

from omegaconf import OmegaConf
from PIL import Image
from tqdm import tqdm, trange
from itertools import islice
from einops import rearrange, repeat
from torchvision.utils import make_grid
from torch import autocast
from contextlib import nullcontext
from pytorch_lightning import seed_everything

from ldm.util import instantiate_from_config
from ldm.models.diffusion.ddim import DDIMSampler
from ldm.models.diffusion.plms import PLMSSampler

In [None]:
def chunk(it, size):
    it = iter(it)
    return iter(lambda: tuple(islice(it, size)), ())

def load_model_from_config(config, ckpt, verbose=False):
    print(f"Loading model from {ckpt}")
    pl_sd = torch.load(ckpt, map_location="cuda:0")
    if "global_step" in pl_sd:
        print(f"Global Step: {pl_sd['global_step']}")
    sd = pl_sd["state_dict"]
    model = instantiate_from_config(config.model)
    m, u = model.load_state_dict(sd, strict=False)
    if len(m) > 0 and verbose:
        print("missing keys:")
        print(m)
    if len(u) > 0 and verbose:
        print("unexpected keys:")
        print(u)

    model.cuda()
    model.eval()
    return model

def load_img(path):
    image = Image.open(path).convert("RGB")
    w, h = image.size
    print(f"loaded input image of size ({w}, {h}) from {path}")
    w, h = map(lambda x: x - x % 32, (w, h))  # resize to integer multiple of 32
    image = image.resize((w, h), resample=PIL.Image.LANCZOS)
    image = np.array(image).astype(np.float32) / 255.0
    image = image[None].transpose(0, 3, 1, 2)
    image = torch.from_numpy(image).half()
    return 2.*image - 1.

def generate(opt,init_img,grid,prompt):
    device = 'cuda'
    images = []
    all_samples = list()

    batch_size = opt.n_samples
    n_rows = opt.n_rows if opt.n_rows > 0 else batch_size

    assert prompt is not None
    data = [batch_size * [prompt]]

    assert os.path.isfile(init_img)
    init_image = load_img(init_img).to(device)
    init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
    init_latent = model.get_first_stage_encoding(model.encode_first_stage(init_image))  # move to latent space

    sampler.make_schedule(ddim_num_steps=opt.ddim_steps, ddim_eta=opt.ddim_eta, verbose=False)

    assert 0. <= opt.strength <= 1., 'can only work with strength in [0.0, 1.0]'
    t_enc = int(opt.strength * opt.ddim_steps)
    print(f"target t_enc is {t_enc} steps")

    precision_scope = autocast if opt.precision == "autocast" else nullcontext
    with torch.no_grad():
        with precision_scope("cuda"):
            with model.ema_scope():
                for n in trange(opt.n_iter, desc="Sampling"):
                    for prompts in tqdm(data, desc="data"):
                        uc = None
                        if opt.scale != 1.0:
                            uc = model.get_learned_conditioning(batch_size * [""])
                        if isinstance(prompts, tuple):
                            prompts = list(prompts)
                        c = model.get_learned_conditioning(prompts)

                        z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc]*batch_size).to(device))
                        samples = sampler.decode(z_enc, c, t_enc, unconditional_guidance_scale=opt.scale,
                                                                  unconditional_conditioning=uc,)

                        if grid:
                            all_samples.append(torch.clamp((model.decode_first_stage(samples) + 1.0) / 2.0, min=0.0, max=1.0))

                        for i in range(batch_size):
                            x_samples_ddim = model.decode_first_stage(samples[i].unsqueeze(0))
                            x_sample = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)

                            x_sample = 255. * rearrange(x_sample[0].cpu().numpy(), 'c h w -> h w c')
                            images += [Image.fromarray(x_sample.astype(np.uint8))]
                        del samples
    if grid:
        grid = torch.stack(all_samples, 0)
        grid = rearrange(grid, 'n b c h w -> (n b) c h w')
        grid = make_grid(grid, nrow=n_rows)
        grid = 255. * rearrange(grid, 'c h w -> h w c').cpu().numpy()
        images = [Image.fromarray(grid.astype(np.uint8))] + images

    print(f'Finished!')
    return images

## 2. Settings and loading model

In [None]:
class config():
      def __init__(self):
        self.config = 'configs/stable-diffusion/v1-inference.yaml' # Don't change this
        self.ckpt = '../model.ckpt' # this points to the model that is in your root gdrive folder

        self.precision = 'autocast' # Change to full and fuck your RAM
        self.ddim_eta = 0.0 # Does nothing, keep as is
        self.C = 4 # Keep as is

        self.seed = 7777

        self.ddim_steps = 30 # Keep within 30 ~ 250, higher is better but slower
        self.H = 512 # Height, the vertical resolution
        self.W = 512 # Width, the horizontal resolution
        self.f = 8 # Visual scale maybe, 256x256 with f = 4 seems to use same RAM as 512x512 with f = 8
        self.scale = 7.5 # Keep within 4 ~ 25, maybe, changes how the prompt is interpreted
        self.strength = 0.7 # How agressive it is, keep between 0.2 ~ 1.0

        self.n_iter = 1 # Maybe improves it, reccomended to keep as is as it multiplies the waiting time
        self.n_samples = 9 # Amount of images outputted
        self.n_rows = 3 # How many images per row (used on grid)

opt = config()
seed_everything(opt.seed)

In [None]:
config = OmegaConf.load(f"{opt.config}")
model = load_model_from_config(config, f"{opt.ckpt}").half()
sampler = DDIMSampler(model)

## 3. Run prompt

add "../" before the name for images in the root gdrive folder

this video(not mine) shows how to upload images: https://siasky.net/_ABvKVbl9c9FDFywm4HauIr3g2EgPg2YbRE8burq8IsK-w

strength sets how agressive it is when modifying the images

In [None]:
prompt = "beautiful landscape mountain river shot wide-angle national park nature preserve" #@param {type:"string"}
img = "../img.png" #@param {type:"string"}
scale = 5 #@param {type:"number"}
height = 512 #@param {type:"integer"}
width = 512 #@param {type:"integer"}
n_steps = 100 #@param {type:"slider", min:30, max:250, step:5}
strength = 0.51 #@param {type:"slider", min:0, max:1, step:0.01}
n_images = 4 #@param {type:"integer"}
n_rows = 2 #@param {type:"integer"}
grid = "yes" #@param ["yes", "no"]
opt.scale = scale
opt.H = height
opt.W = width
opt.n_samples = n_images
opt.n_rows = n_rows
opt.ddim_steps = n_steps
opt.strength = strength
images = generate(opt, init_img=img, grid=(grid=="yes"), prompt=prompt)

## Output

In [None]:
images[0]

In [None]:
images[1]

In [None]:
images[2]

In [None]:
images[3]

In [None]:
images[4]

---
# **Text 2 Image with k-diffusion**
---


## 1. Setup

In [None]:
#cd /content/gdrive/MyDrive/stable-diffusion

In [None]:
import argparse, os, sys, glob
import time
import torch
import numpy as np

from omegaconf import OmegaConf
from PIL import Image
from tqdm import tqdm, trange
from itertools import islice
from einops import rearrange
from torchvision.utils import make_grid
from pytorch_lightning import seed_everything
from torch import autocast
from contextlib import contextmanager, nullcontext

import accelerate
import k_diffusion as K
import torch.nn as nn

from ldm.util import instantiate_from_config
from ldm.models.diffusion.ddim import DDIMSampler
from ldm.models.diffusion.plms import PLMSSampler

In [None]:
def chunk(it, size):
    it = iter(it)
    return iter(lambda: tuple(islice(it, size)), ())


def load_model_from_config(ckpt, verbose=False):
    print(f"Loading model from {ckpt}")
    pl_sd = torch.load(ckpt, map_location="cuda:0")
    if "global_step" in pl_sd:
        print(f"Global Step: {pl_sd['global_step']}")
    sd = pl_sd["state_dict"]
    return sd

class CFGDenoiser(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.inner_model = model

    def forward(self, x, sigma, uncond, cond, cond_scale):
        x_in = torch.cat([x] * 2)
        sigma_in = torch.cat([sigma] * 2)
        cond_in = torch.cat([uncond, cond])
        uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2)
        return uncond + (cond - uncond) * cond_scale

def generate(opt,prompt,grid):
    accelerator = accelerate.Accelerator()
    device = accelerator.device
    images = []

    seeds = torch.randint(-2 ** 63, 2 ** 63 - 1, [accelerator.num_processes])
    torch.manual_seed(seeds[accelerator.process_index].item())

    batch_size = opt.n_samples
    n_rows = opt.n_rows if opt.n_rows > 0 else batch_size

    assert prompt is not None
    data = [batch_size * [prompt]]

    start_code = torch.randn([opt.n_samples, opt.C, opt.H // opt.f, opt.W // opt.f], device=device)

    precision_scope = autocast if opt.precision=="autocast" else nullcontext
    with torch.no_grad():
        all_samples = list()
        with precision_scope("cuda"):
            for n in trange(opt.n_iter, desc="Sampling", disable =not accelerator.is_main_process):
                for prompts in tqdm(data, desc="data", disable =not accelerator.is_main_process):
                    #modelCS.to(device)
                    uc = None
                    if opt.scale != 1.0:
                        uc = modelCS.get_learned_conditioning(batch_size * [""])
                    if isinstance(prompts, tuple):
                        prompts = list(prompts)

                    c = modelCS.get_learned_conditioning(prompts)
                    shape = [opt.C, opt.H // opt.f, opt.W // opt.f]
                    #mem = torch.cuda.memory_allocated()/1e6
                    #modelCS.to("cpu")
                    #while(torch.cuda.memory_allocated()/1e6 >= mem):
                    #    time.sleep(1)

                    sigmas = model_wrap.get_sigmas(opt.ddim_steps)
                    torch.manual_seed(opt.seed)
                    x = torch.randn([opt.n_samples, *shape], device=device) * sigmas[0] # for GPU draw
                    model_wrap_cfg = CFGDenoiser(model_wrap)
                    extra_args = {'cond': c, 'uncond': uc, 'cond_scale': opt.scale}
                    samples_ddim = K.sampling.sample_lms(model_wrap_cfg, x, sigmas, extra_args=extra_args, disable=not accelerator.is_main_process)
                    
                    #modelFS.to(device)
                    for i in range(batch_size):
                        x_samples_ddim = modelFS.decode_first_stage(samples_ddim[i].unsqueeze(0))
                        x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
                        x_sample = accelerator.gather(x_samples_ddim)
                        if grid:
                            all_samples.append(x_sample)
                        x_sample = 255. * rearrange(x_sample[0].cpu().numpy(), 'c h w -> h w c')
                        images +=[Image.fromarray(x_sample.astype(np.uint8))]
                    #mem = torch.cuda.memory_allocated()/1e6
                    #modelFS.to("cpu")
                    #while(torch.cuda.memory_allocated()/1e6 >= mem):
                    #    time.sleep(1)

                    del samples_ddim
            if grid:
                grid = torch.stack(all_samples, 0)
                grid = rearrange(grid, 'n b c h w -> (n b) c h w')
                grid = make_grid(grid, nrow=n_rows)
                grid = 255. * rearrange(grid, 'c h w -> h w c').cpu().numpy()
                images = [Image.fromarray(grid.astype(np.uint8))] + images

            print(f'Finished!')
            return images

## 2. Load Model

In [None]:
ckpt = '../model.ckpt' # this points to the model that is in your root gdrive folder

sd = load_model_from_config(f"{ckpt}")
li = []
lo = []
for key, value in sd.items():
    sp = key.split('.')
    if(sp[0]) == 'model':
        if('input_blocks' in sp):
            li.append(key)
        elif('middle_block' in sp):
            li.append(key)
        elif('time_embed' in sp):
            li.append(key)
        else:
            lo.append(key)
for key in li:
    sd['model1.' + key[6:]] = sd.pop(key)
for key in lo:
    sd['model2.' + key[6:]] = sd.pop(key)

## 3. Settings

In [None]:
class config():
      def __init__(self):
        self.config = 'optimizedSD/v1-inference.yaml' # Don't change this
        self.ckpt = ckpt # If you want to change the model location, change it on the Load movel section

        self.precision = 'autocast' # Change to full and fuck your RAM
        self.ddim_eta = 0.0 # Does nothing, keep as is
        self.C = 4 # Keep as is

        self.seed = 435455

        self.ddim_steps = 30 # Keep within 30 ~ 250, higher is better but slower
        self.H = 256 # Height, the vertical resolution
        self.W = 256 # Width, the horizontal resolution
        self.f = 8 # Visual scale maybe, 256x256 with f = 4 seems to use same RAM as 512x512 with f = 8
        self.scale = 7.5 # Keep within 4 ~ 25, maybe, changes how the prompt is interpreted

        self.n_iter = 1 # Maybe improves it, reccomended to keep as is as it multiplies the waiting time
        self.n_samples = 4 # Amount of images outputted
        self.n_rows = 2 # How many images per row (used on grid)

opt = config()
seed_everything(opt.seed)

In [None]:
config = OmegaConf.load(f"{opt.config}")
config.modelUNet.params.ddim_steps = opt.ddim_steps

model = instantiate_from_config(config.modelUNet)
_, _ = model.load_state_dict(sd, strict=False)
model.eval()
    
modelCS = instantiate_from_config(config.modelCondStage)
_, _ = modelCS.load_state_dict(sd, strict=False)
modelCS.eval()
modelCS.to('cuda')

modelFS = instantiate_from_config(config.modelFirstStage)
_, _ = modelFS.load_state_dict(sd, strict=False)
modelFS.eval()
modelFS.to('cuda')

if opt.precision == "autocast":
    model.half()
    modelCS.half()

model_wrap = K.external.CompVisDenoiser(model)
sigma_min, sigma_max = model_wrap.sigmas[0].item(), model_wrap.sigmas[-1].item()

## 4. Run Prompt

In [None]:
prompt = "Anatomical cross section of a tree with a boney skeletal structure 1970s scientific diagram" #@param {type:"string"}
scale = 7.5 #@param {type:"number"}
height = 512 #@param {type:"integer"}
width = 512 #@param {type:"integer"}
n_steps = 30 #@param {type:"slider", min:30, max:250, step:5}
n_images = 4 #@param {type:"integer"}
n_rows = 2 #@param {type:"integer"}
grid = "yes" #@param ["yes", "no"]
opt.scale = scale
opt.H = height
opt.W = width
opt.n_samples = n_images
opt.n_rows = n_rows
opt.ddim_steps = n_steps
images = generate(opt=opt, prompt=prompt, grid=(grid=="yes"))

## Output

If you generated more than 5 images(the grid counts too), just add more code lines continuing the sequence

If you chose grid, it's the first one

Right click and save to download

In [None]:
images[0]

In [None]:
images[1]

In [None]:
images[2]

In [None]:
images[3]

In [None]:
images[4]