In [None]:
import time
from pathlib import Path
import shutil

import cv2
import diffusers
import matplotlib.pyplot as plt
import numpy as np
import torch
from diffusers import LMSDiscreteScheduler, StableDiffusionPipeline
from PIL import Image
from skimage.exposure import match_histograms
from torch import autocast
from torchvision import transforms as tfms
from tqdm.auto import tqdm

from img2img import StableDiffusionImg2ImgPipeline

In [2]:
STABLE_DIFFUSION_MODEL_PATH = Path.home() / "Desktop/stable-diffusion-v1-4"
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
# Load the txt_to_img pipeline
txt_to_img = StableDiffusionPipeline.from_pretrained(
    str(STABLE_DIFFUSION_MODEL_PATH), revision="fp16", torch_dtype=torch.float16
)
# Turn off safety_checker to avoid false positives
txt_to_img.safety_checker = lambda images, **kwargs: (images, False)
# txt_to_img.enable_attention_slicing()  # use less vram
txt_to_img = txt_to_img.to(device)
# Load the img2img pipeline, using the models
# from the txt_to_img pipeline, to not waste vram.
im2im = StableDiffusionImg2ImgPipeline(
    vae=txt_to_img.vae,
    text_encoder=txt_to_img.text_encoder,
    tokenizer=txt_to_img.tokenizer,
    unet=txt_to_img.unet,
    scheduler=LMSDiscreteScheduler(
        beta_start=0.00085,
        beta_end=0.012,
        beta_schedule="scaled_linear",
        num_train_timesteps=1000,
    ),
)
# im2im.enable_attention_slicing()
im2im.progress_bar = lambda iterable: iterable  # turn off progress bar
im2im.to(device)

{'trained_betas'} was not found in config. Values will be initialized to default values.
ftfy or spacy is not installed using BERT BasicTokenizer instead of ftfy.


StableDiffusionImg2ImgPipeline {
  "_class_name": "StableDiffusionImg2ImgPipeline",
  "_diffusers_version": "0.3.0",
  "scheduler": [
    "diffusers",
    "LMSDiscreteScheduler"
  ],
  "text_encoder": [
    "transformers",
    "CLIPTextModel"
  ],
  "tokenizer": [
    "transformers",
    "CLIPTokenizer"
  ],
  "unet": [
    "diffusers",
    "UNet2DConditionModel"
  ],
  "vae": [
    "diffusers",
    "AutoencoderKL"
  ]
}

In [4]:
# Helpers


def timestamp():
    return time.strftime("%Y%m%d-%H%M%S")


def maintain_colors(prev_img, color_match_sample, mode):
    # source: https://colab.research.google.com/github/deforum/stable-diffusion/blob/main/Deforum_Stable_Diffusion.ipynb#scrollTo=2g-f7cQmf2Nt
    if mode == "Match Frame 0 RGB":
        return match_histograms(prev_img, color_match_sample, multichannel=True)
    elif mode == "Match Frame 0 HSV":
        prev_img_hsv = cv2.cvtColor(prev_img, cv2.COLOR_RGB2HSV)
        color_match_hsv = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2HSV)
        matched_hsv = match_histograms(prev_img_hsv, color_match_hsv, multichannel=True)
        return cv2.cvtColor(matched_hsv, cv2.COLOR_HSV2RGB)
    else:  # Match Frame 0 LAB
        prev_img_lab = cv2.cvtColor(prev_img, cv2.COLOR_RGB2LAB)
        color_match_lab = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2LAB)
        matched_lab = match_histograms(prev_img_lab, color_match_lab, multichannel=True)
        return cv2.cvtColor(matched_lab, cv2.COLOR_LAB2RGB)

In [90]:
OUTPUT_DIR = Path(f"images/{timestamp()}")
PROMPT_A = "A photo of carrots"
PROMPT_B = "A photo of hands"
GUIDANCE_SCALE = 7.5
IMG2IMG_STRENGTH = 0.65
NUM_IMG2IMG_STEPS = 20
SEED = 0
WIDTH = 512
HEIGHT = 512
OUTPUT_DIR.mkdir(exist_ok=True, parents=True)
# Use same random seed for everything
generator = torch.Generator("cuda").manual_seed(SEED)
# To know where things were
notebook_path = Path(__vsc_ipynb_file__)  # vscode only
print(f"{notebook_path.resolve() = }")
print(f"{OUTPUT_DIR.resolve() = }")

notebook_path.resolve() = PosixPath('/home/sid/Documents/stable-diffusion-im2im/main.ipynb')
OUTPUT_DIR.resolve() = PosixPath('/home/sid/Documents/stable-diffusion-im2im/images/20221002-154356')


In [100]:
# save a copy of this nb in OUTPUT_DIR, for reproducibility
shutil.copy(
    src=notebook_path, dst=OUTPUT_DIR / f"{notebook_path.stem}_{timestamp()}.ipynb"
)

PosixPath('images/20221002-154356/main_20221002-155443.ipynb')

In [92]:
# Generate the initial image
with autocast("cuda"), torch.no_grad():
    init_image = txt_to_img(
        [PROMPT_A], width=WIDTH, height=HEIGHT, generator=generator
    )["sample"][0]
init_image.save(OUTPUT_DIR / f"{PROMPT_A}_{PROMPT_B}_{0:04d}.jpg")
image = init_image
i = 0

100%|██████████| 51/51 [00:09<00:00,  5.36it/s]


In [93]:
i_start = i + 1  # means can rerun cell to continue generation
# Generate the rest of the images
for i in tqdm(range(i_start, i_start + NUM_IMG2IMG_STEPS)):
    # Try to prevent colours from going red
    image = Image.fromarray(
        maintain_colors(np.array(image), np.array(init_image), "Match Frame 0 RGB")
    )
    generator = torch.Generator("cuda").manual_seed(i)
    with autocast("cuda"), torch.no_grad():
        image = im2im(
            PROMPT_B,
            image,
            strength=IMG2IMG_STRENGTH,
            guidance_scale=GUIDANCE_SCALE,
            generator=generator,
        )["sample"][0]
    image.save(OUTPUT_DIR / f"{PROMPT_A}_{PROMPT_B}_{i:04d}.jpg")
print(f"{i = }")

  return match_histograms(prev_img, color_match_sample, multichannel=True)
100%|██████████| 20/20 [02:07<00:00,  6.40s/it]

i = 20





In [95]:
i_start = i + 1  # means can rerun cell to continue generation
# Generate the rest of the images
for i in tqdm(range(i_start, i_start + NUM_IMG2IMG_STEPS)):
    # Try to prevent colours from going red
    image = Image.fromarray(
        maintain_colors(np.array(image), np.array(init_image), "Match Frame 0 RGB")
    )
    generator = torch.Generator("cuda").manual_seed(i)
    with autocast("cuda"), torch.no_grad():
        image = im2im(
            PROMPT_A,
            image,
            strength=IMG2IMG_STRENGTH,
            guidance_scale=GUIDANCE_SCALE,
            generator=generator,
        )["sample"][0]
    image.save(OUTPUT_DIR / f"{PROMPT_A}_{PROMPT_B}_{i:04d}.jpg")
print(f"{i = }")

  return match_histograms(prev_img, color_match_sample, multichannel=True)
100%|██████████| 20/20 [02:09<00:00,  6.46s/it]

i = 40





In [102]:
# turn the images into a movie
!ffmpeg -y -framerate 8 -pattern_type glob -i "{OUTPUT_DIR}/*.jpg"  \
    -c:v libx264 -pix_fmt yuv420p -vf trim=0:30 "{OUTPUT_DIR}/movie.mp4"

# Play the movie with vlc (if it's installed)
!vlc "{OUTPUT_DIR}/movie.mp4"

ffmpeg version 4.4.2 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 10.4.0 (conda-forge gcc 10.4.0-16)
  configuration: --prefix=/home/conda/feedstock_root/build_artifacts/ffmpeg_1664281150702/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1664281150702/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1664281150702/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1664281150702/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1664281150702/_build_env/bin/x86_64-conda-linux-gnu-ar --disable-doc --disable-openssl --enable-avresample --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libo

In [106]:
# make the movie into a loop, by concatenating the reverse of it
!ffmpeg -i "{OUTPUT_DIR}/movie.mp4"  \
    -filter_complex "[0:v]reverse,fifo[r];[0:v][r] concat=n=2:v=1 [v]" -map "[v]"  \
    -y  \
    "{OUTPUT_DIR}/movie_loop.mp4"

!vlc "{OUTPUT_DIR}/movie_loop.mp4"

ffmpeg version 4.4.2 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 10.4.0 (conda-forge gcc 10.4.0-16)
  configuration: --prefix=/home/conda/feedstock_root/build_artifacts/ffmpeg_1664281150702/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1664281150702/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1664281150702/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1664281150702/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1664281150702/_build_env/bin/x86_64-conda-linux-gnu-ar --disable-doc --disable-openssl --enable-avresample --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libo