In [2]:
from diffusers.utils import load_image, make_image_grid
from PIL import Image
import cv2
import numpy as np

original_image = load_image(
    "./lantern.png"
)

image = np.array(original_image)

low_threshold = 100
high_threshold = 200

image = cv2.Canny(image, low_threshold, high_threshold)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
canny_image = Image.fromarray(image)

In [3]:
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
import torch

controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16, use_safetensors=True)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16, use_safetensors=True
)

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe.to("cuda")
pipe.enable_model_cpu_offload()

Loading pipeline components...: 100%|██████████| 7/7 [00:10<00:00,  1.55s/it]


In [4]:
for i in range(10):
    output = pipe(
        "Person with lantern standing in a bog", image=canny_image
        ).images[0]
    make_image_grid([original_image, canny_image, output], rows=1, cols=3)

    output.save("./bog"+str(i)+".jpg")

  hidden_states = F.scaled_dot_product_attention(
100%|██████████| 50/50 [02:07<00:00,  2.55s/it]
100%|██████████| 50/50 [04:18<00:00,  5.16s/it]
100%|██████████| 50/50 [04:18<00:00,  5.17s/it]
100%|██████████| 50/50 [04:17<00:00,  5.15s/it]
100%|██████████| 50/50 [04:17<00:00,  5.15s/it]
100%|██████████| 50/50 [04:17<00:00,  5.16s/it]
100%|██████████| 50/50 [04:17<00:00,  5.15s/it]
100%|██████████| 50/50 [04:18<00:00,  5.17s/it]
100%|██████████| 50/50 [04:17<00:00,  5.14s/it]
100%|██████████| 50/50 [1:04:57<00:00, 77.95s/it]    


In [4]:
for i in range(10):
    output = pipe(
        "Photo of a Concrete beam bridge with cracks over water", image=canny_image
        ).images[0]
    make_image_grid([original_image, canny_image, output], rows=1, cols=3)

    output.save("./semantic_masks/generated_images/beam_bridge"+str(i)+".jpg")

100%|██████████| 50/50 [02:21<00:00,  2.83s/it]
100%|██████████| 50/50 [02:21<00:00,  2.83s/it]
100%|██████████| 50/50 [02:21<00:00,  2.83s/it]
100%|██████████| 50/50 [02:21<00:00,  2.83s/it]
100%|██████████| 50/50 [02:21<00:00,  2.83s/it]
100%|██████████| 50/50 [02:21<00:00,  2.83s/it]
100%|██████████| 50/50 [02:21<00:00,  2.82s/it]
100%|██████████| 50/50 [02:21<00:00,  2.83s/it]
100%|██████████| 50/50 [02:21<00:00,  2.82s/it]
100%|██████████| 50/50 [02:21<00:00,  2.82s/it]


In [5]:
for i in range(10):
    output = pipe(
        "Photo of a concrete slab bridge with cracks over water", image=canny_image
        ).images[0]
    make_image_grid([original_image, canny_image, output], rows=1, cols=3)

    output.save("./semantic_masks/generated_images/slab_bridge"+str(i)+".jpg")

100%|██████████| 50/50 [02:21<00:00,  2.83s/it]
100%|██████████| 50/50 [02:21<00:00,  2.82s/it]
100%|██████████| 50/50 [02:21<00:00,  2.82s/it]
100%|██████████| 50/50 [02:21<00:00,  2.82s/it]
100%|██████████| 50/50 [02:21<00:00,  2.82s/it]
100%|██████████| 50/50 [03:41<00:00,  4.42s/it]
100%|██████████| 50/50 [03:40<00:00,  4.41s/it]
100%|██████████| 50/50 [03:40<00:00,  4.42s/it]
100%|██████████| 50/50 [03:43<00:00,  4.48s/it]
100%|██████████| 50/50 [05:32<00:00,  6.64s/it]


In [7]:
for i in range(10):
    output = pipe(
        "Photo of a concrete suspension bridge with cracks over water", image=canny_image
        ).images[0]
    make_image_grid([original_image, canny_image, output], rows=1, cols=3)

    output.save("./semantic_masks/generated_images/suspension_bridge"+str(i)+".jpg")

100%|██████████| 50/50 [06:58<00:00,  8.36s/it]
 40%|████      | 20/50 [02:19<03:28,  6.96s/it]


KeyboardInterrupt: 