In [24]:
import time
from pathlib import Path

import diffusers
from diffusers import StableDiffusionPipeline, LMSDiscreteScheduler
import torch
import matplotlib.pyplot as plt
from PIL import Image
from torch import autocast
from torchvision import transforms as tfms
from tqdm.auto import tqdm

In [5]:
STABLE_DIFFUSION_MODEL_PATH = Path.home() / "Desktop/stable-diffusion-v1-4"


device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
pipe = StableDiffusionPipeline.from_pretrained(
    str(STABLE_DIFFUSION_MODEL_PATH), revision="fp16", torch_dtype=torch.float16
)
pipe.safety_checker = lambda images, **kwargs: (images, False)  # turn off to avoid false positives
pipe = pipe.to(device)

ftfy or spacy is not installed using BERT BasicTokenizer instead of ftfy.
{'trained_betas'} was not found in config. Values will be initialized to default values.


In [6]:
tokenizer = pipe.tokenizer
text_encoder = pipe.text_encoder
unet = pipe.unet
vae = pipe.vae
scheduler = LMSDiscreteScheduler(
    beta_start=0.00085,
    beta_end=0.012,
    beta_schedule="scaled_linear",
    num_train_timesteps=1000,
)

In [21]:
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline
from transformers import CLIPFeatureExtractor

from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker

# safety_cheker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(self.device)
# image, has_nsfw_concept = self.safety_checker(images=image, clip_input=safety_cheker_input.pixel_values)

class FeatureExtractorDummy(CLIPFeatureExtractor):
    def __init__(self, *args, **kwargs):
        pass

    def to(self, *args, **kwargs):
        return None

safety_checker_dummy = lambda images, **kwargs: (images, False)

In [22]:
def register_modules(self, **kwargs):
    for k, v in kwargs.items():
        setattr(self, k, v)

StableDiffusionImg2ImgPipeline.register_modules = register_modules

In [23]:
im2im = StableDiffusionImg2ImgPipeline(
    vae = vae,
    text_encoder=text_encoder,
    tokenizer=tokenizer,
    unet=unet,
    scheduler=scheduler,
    safety_checker=safety_checker_dummy,
    feature_extractor=FeatureExtractorDummy
)
im2im.progress_bar = lambda iterable: iterable  # turn off progress bar

In [28]:
# Helpers

timestamp = lambda: time.strftime("%Y%m%d-%H%M%S")

from skimage.exposure import match_histograms
import cv2
import numpy as np

def maintain_colors(prev_img, color_match_sample, mode):
    # source: https://colab.research.google.com/github/deforum/stable-diffusion/blob/main/Deforum_Stable_Diffusion.ipynb#scrollTo=2g-f7cQmf2Nt
    if mode == 'Match Frame 0 RGB':
        return match_histograms(prev_img, color_match_sample, multichannel=True)
    elif mode == 'Match Frame 0 HSV':
        prev_img_hsv = cv2.cvtColor(prev_img, cv2.COLOR_RGB2HSV)
        color_match_hsv = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2HSV)
        matched_hsv = match_histograms(prev_img_hsv, color_match_hsv, multichannel=True)
        return cv2.cvtColor(matched_hsv, cv2.COLOR_HSV2RGB)
    else: # Match Frame 0 LAB
        prev_img_lab = cv2.cvtColor(prev_img, cv2.COLOR_RGB2LAB)
        color_match_lab = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2LAB)
        matched_lab = match_histograms(prev_img_lab, color_match_lab, multichannel=True)
        return cv2.cvtColor(matched_lab, cv2.COLOR_LAB2RGB)


In [29]:
from pathlib import Path

initial_prompt = "A photo of a bowl of fruit"
prompt = "A photo of an acrobat"
N = 1000

movie_dir = Path(f"images/{timestamp()}")
movie_dir.mkdir(exist_ok=True, parents=True)

generator = torch.Generator("cuda").manual_seed(0)

In [30]:
# Generate the initial image
with autocast("cuda"), torch.no_grad():
    init_image = pipe([initial_prompt], generator=generator)["sample"][0]

  0%|          | 0/51 [00:02<?, ?it/s]


RuntimeError: CUDA out of memory. Tried to allocate 512.00 MiB (GPU 0; 7.79 GiB total capacity; 2.07 GiB already allocated; 275.06 MiB free; 2.08 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
# Loop from scratch

image = init_image
# Generate the rest of the images
for i in tqdm(range(1, N)):
    image.save(movie_dir / f"{initial_prompt}_{prompt}_{i:04d}.jpg")
    # Stop colours from going red
    image = maintain_colors(np.array(image), np.array(init_image), 'Match Frame 0 RGB')
    image = Image.fromarray(image)
    generator = torch.Generator("cuda").manual_seed(i)
    with autocast("cuda"), torch.no_grad():
        image = im2im(prompt, image, strength=0.5, guidance_scale=7.5, generator=generator)["sample"][0]

image.save(movie_dir / f"{initial_prompt}_{prompt}_{i + 1:04d}.jpg")