<a href="https://colab.research.google.com/github/ritwikraha/computer-needs-glasses/blob/master/image-generation/understanding_tasks_in_diffusers_part_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Understanding Tasks in Diffusers Part 2

This tutorial is completely inspired drom the [beautiful documentation](https://huggingface.co/docs/diffusers/en/using-diffusers/inpaint?regular-specific=runwayml%2Fstable-diffusion-v1-5#inpainting) at HuggingFace Diffusers.

### Setup and Imports

In [None]:
!pip install -qq diffusers accelerate

In [None]:
import torch
import numpy as np
import cv2
import matplotlib as plt
from diffusers import AutoPipelineForText2Image, AutoPipelineForInpainting, ControlNetModel, StableDiffusionControlNetInpaintPipeline
from diffusers.utils import load_image, make_image_grid

### Loading the image and masks

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
generator = torch.Generator(device).manual_seed(31)

In [None]:
init_image = load_image("https://i.imgur.com/UjAIycp.png")
mask_image = load_image("https://i.imgur.com/JMHtoZE.png")

In [None]:
model_id = "kandinsky-community/kandinsky-2-2-decoder-inpaint"

pipeline = AutoPipelineForInpainting.from_pretrained(
    model_id, torch_dtype=torch.float16
).to(device)

In [None]:
prompt = "a peepal tree, illustrated, cute, pixar, disney, 8k"
negative_prompt = "bad anatomy, deformed, ugly, disfigured"

In [None]:
image = pipeline(prompt=prompt,
                 negative_prompt=negative_prompt,
                 image=init_image,
                 mask_image=mask_image,
                 generator=generator).images[0]

make_image_grid([init_image, mask_image, image], rows=1, cols=3)

####  Blurring the Mask Area

In [None]:
blurred_mask = pipeline.mask_processor.blur(mask, blur_factor=33)
blurred_mask

In [None]:
image = pipeline(prompt=prompt,
                 negative_prompt=negative_prompt,
                 image=init_image,
                 mask_image=blurred_mask,
                 generator=generator).images[0]

make_image_grid([init_image, mask_image, image], rows=1, cols=3)

In [None]:
pipeline.to("cpu")
del pipeline
torch.cuda.empty_cache()

### SDXl for inpainting

In [None]:
pipeline = AutoPipelineForInpainting.from_pretrained(
    "diffusers/stable-diffusion-xl-1.0-inpainting-0.1", torch_dtype=torch.float16, variant="fp16"
)

prompt = "concept art of a small haunted cottage in the middle of a wheatfield, stephen king, horror, sinister, 8k"
negative_prompt = "bad anatomy, deformed, ugly, disfigured"


In [None]:
image = pipeline(prompt=prompt,
                 negative_prompt=negative_prompt,
                 image=init_image,
                 mask_image=mask_image,
                 generator=generator).images[0]

make_image_grid([init_image, mask_image, image], rows=1, cols=3)

In [None]:
pipeline.to("cpu")
del pipeline
torch.cuda.empty_cache()

### Preserving Unmasked Area

In [None]:
model_id = "runwayml/stable-diffusion-inpainting"
pipeline = AutoPipelineForInpainting.from_pretrained(
    "runwayml/stable-diffusion-inpainting",
    torch_dtype=torch.float16,
).to(device)

In [None]:
prompt = "concept art of a small haunted cottage in the middle of a wheatfield, stephen king, horror, sinister, 8k"
inpainted_image = pipeline(prompt=prompt,
                           image=init_image,
                           mask_image=mask_image).images[0]

In [None]:
unmasked_unchanged_image = pipeline.image_processor.apply_overlay(mask_image,
                                                                  init_image,
                                                                  inpainted_image)
make_image_grid([init_image, mask_image, repainted_image, unmasked_unchanged_image], rows=2, cols=2)

In [None]:
pipeline.to("cpu")
del pipeline
torch.cuda.empty_cache()

### Pipeline Parameters

- Strength
- Guidance Scale
- Negative Prompt
- Padding Mask Crop

In [None]:
model_id = "runwayml/stable-diffusion-inpainting"
pipeline = AutoPipelineForInpainting.from_pretrained(
    model_id, torch_dtype=torch.float16, variant="fp16"
)

In [None]:
prompt = "a huge easter bunny, cute, adorable, pixar, disney, 8k"
negative_prompt = "bad anatomy, deformed, ugly, disfigured"

In [None]:
image = pipeline(prompt=prompt,
                 negative_prompt=negative_prompt,
                 image=init_image,
                 mask_image=mask_image,
                 strength=0.6,
                 guidance_scale=2.5,
                 padding_mask_crop=32).images[0]
make_image_grid([init_image, mask_image, image], rows=1, cols=3)

In [None]:
pipeline.to("cpu")
del pipeline
torch.cuda.empty_cache()

### Chained Inpainting

- Text2Image -> Inpainting

In [None]:
model_id = "runwayml/stable-diffusion-inpainting"
pipeline = AutoPipelineForText2Image.from_pretrained(
    model_id, torch_dtype=torch.float16, variant="fp16", use_safetensors=True
)


text2image = pipeline("illustration of the hogwarts express going on a bridge, highly detailed, 8k").images[0]
text2image.resize((512, 512))

In [None]:
# load an arbitrary image mask of 512X512
arbitrary_mask = load_image("https://i.imgur.com/X4yzBR7.png")

In [None]:
pipeline.to("cpu")
del pipeline
torch.cuda.empty_cache()

In [None]:
model_id = "kandinsky-community/kandinsky-2-2-decoder-inpaint"
pipeline = AutoPipelineForInpainting.from_pretrained(
    model_id, torch_dtype=torch.float16
)

prompt = "concept art of magical spells, swirls, abstract, illustrated, highly detailed, 8K "
image = pipeline(prompt=prompt,
                 image=text2image,
                 mask_image=arbitrary_mask).images[0]
make_image_grid([text2image, mask_image, image], rows=1, cols=3)

In [None]:
pipeline.to("cpu")
del pipeline
torch.cuda.empty_cache()

#### Inpainting to Image2Image

In [None]:
model_id = "runwayml/stable-diffusion-inpainting"
pipeline = AutoPipelineForInpainting.from_pretrained(
    model_id, torch_dtype=torch.float16, variant="fp16"
)

In [None]:
prompt = "concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k"
negative_prompt = ""

In [None]:
image_inpainting = pipeline(prompt=prompt,
                            negative_prompt=negative_prompt,
                            image=init_image,
                            mask_image=mask_image).images[0]

# resize image to 1024x1024 for SDXL
image_inpainting = image_inpainting.resize((1024, 1024))

In [None]:
pipeline.to("cpu")
del pipeline
torch.cuda.empty_cache()

In [None]:
model_id = "stabilityai/stable-diffusion-xl-refiner-1.0"
pipeline = AutoPipelineForInpainting.from_pretrained(
    model_id, torch_dtype=torch.float16, variant="fp16"
)

In [None]:
image = pipeline(prompt=prompt,
                 image=image_inpainting,
                 mask_image=mask_image,
                 output_type="latent").images[0]

In [None]:
pipeline = AutoPipelineForImage2Image.from_pipe(pipeline)

image = pipeline(prompt=prompt, image=image).images[0]
make_image_grid([init_image, mask_image, image_inpainting, image], rows=2, cols=2)

In [None]:
pipeline.to("cpu")
del pipeline
torch.cuda.empty_cache()

### Controlling Image Generation


- ControlNet


In [None]:
controlnet_model_id = "lllyasviel/control_v11p_sd15_inpaint"
sd_model_id = "runwayml/stable-diffusion-inpainting"

# load ControlNet
controlnet = ControlNetModel.from_pretrained(controlnet_model_id,
                                             torch_dtype=torch.float16,
                                             variant="fp16").to(device)

# pass ControlNet to the pipeline
pipeline = StableDiffusionControlNetInpaintPipeline.from_pretrained(
    sd_model_id,
    controlnet=controlnet,
    torch_dtype=torch.float16,
    variant="fp16"
).to(device)


In [None]:
# prepare control image
def make_inpaint_condition(init_image, mask_image):
    init_image = np.array(init_image.convert("RGB")).astype(np.float32) / 255.0
    mask_image = np.array(mask_image.convert("L")).astype(np.float32) / 255.0

    assert init_image.shape[0:1] == mask_image.shape[0:1], "image and image_mask must have the same image size"
    init_image[mask_image > 0.5] = -1.0  # set as masked pixel
    init_image = np.expand_dims(init_image, 0).transpose(0, 3, 1, 2)
    init_image = torch.from_numpy(init_image)
    return init_image

In [None]:
control_image = make_inpaint_condition(init_image, mask_image)
control_image

In [None]:
prompt = "concept art of a small haunted cottage in the middle of a wheatfield, stephen king, horror, sinister, 8k"
image = pipeline(prompt=prompt, image=init_image, mask_image=mask_image, control_image=control_image).images[0]
make_image_grid([init_image, mask_image, PIL.Image.fromarray(np.uint8(control_image[0][0])).convert('RGB'), image], rows=2, cols=2)

In [None]:
pipeline.to("cpu")
del pipeline
torch.cuda.empty_cache()

In [None]:
model_id = "stabilityai/stable-diffusion-xl-refiner-1.0"
pipeline = AutoPipelineForImage2Image.from_pretrained(
    model_id, torch_dtype=torch.float16,
)

prompt = "zack snyder style"
negative_prompt = "bad architecture, deformed, disfigured, poor details"

image_elden_ring = pipeline(prompt, negative_prompt=negative_prompt, image=image).images[0]
make_image_grid([init_image, mask_image, image, image_elden_ring], rows=2, cols=2)

In [None]:
pipeline.to("cpu")
del pipeline
torch.cuda.empty_cache()