<a href="https://colab.research.google.com/github/ritwikraha/computer-needs-glasses/blob/master/image-generation/Image_Outpainting_with_ControlNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Image Outpainting

Adapted from this comment: https://github.com/huggingface/diffusers/discussions/7482

In [None]:
!pip install diffusers -qq
!pip install accelerate -qq

In [None]:
import random
from io import BytesIO

import requests
import torch
from PIL import Image

from diffusers import (
    AutoPipelineForImage2Image,
    ControlNetModel,
    DPMSolverMultistepScheduler,
    StableDiffusionXLControlNetPipeline,
)
from diffusers.image_processor import IPAdapterMaskProcessor
from diffusers.utils import load_image, logging
from diffusers.utils.logging import set_verbosity


In [None]:
set_verbosity(logging.ERROR)  # to not show cross_attention_kwargs..by AttnProcessor2_0 warnings

In [None]:
controlnet = ControlNetModel.from_pretrained(
    "OzzyGT/controlnet-inpaint-dreamer-sdxl", torch_dtype=torch.float16, variant="fp16"
)

pipeline = StableDiffusionXLControlNetPipeline.from_pretrained(
    "RunDiffusion/Juggernaut-XL-v9",
    torch_dtype=torch.float16,
    variant="fp16",
    controlnet=controlnet,
).to("cuda")

pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config)
pipeline.scheduler.config.use_karras_sigmas = True

pipeline.load_ip_adapter(
    "h94/IP-Adapter",
    subfolder="sdxl_models",
    weight_name="ip-adapter-plus_sdxl_vit-h.safetensors",
    image_encoder_folder="models/image_encoder",
)
pipeline.set_ip_adapter_scale(0.4)

In [None]:
ip_wolf_image = load_image(
    "https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/outpainting/ip_wolf_source.png?download=true"
)
#TODO: Change this to get the mask from SAM
ip_mask = load_image(
    "https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/outpainting/wolf_position_mask.png?download=true"
)

processor = IPAdapterMaskProcessor()
ip_masks = processor.preprocess(ip_mask, height=1024, width=1024)

#TODO:Change this to get the ControlNet Image from a function
response = requests.get(
    "https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/outpainting/313891870-adb6dc80-2e9e-420c-bac3-f93e6de8d06b.png?download=true"
)
control_image = Image.open(BytesIO(response.content))
new_controlnet_image = Image.new("RGBA", control_image.size, "WHITE")
new_controlnet_image.alpha_composite(control_image)

prompt = "high quality photo of a wolf playing basketball, highly detailed, professional, dramatic ambient light, cinematic, dynamic background, focus"
negative_prompt = ""

In [None]:
seed = random.randint(0, 2**32 - 1)
generator = torch.Generator(device="cpu").manual_seed(seed)

latents = pipeline(
    prompt=prompt,
    negative_prompt=negative_prompt,
    height=1024,
    width=1024,
    guidance_scale=6.5,
    num_inference_steps=25,
    generator=generator,
    image=new_controlnet_image,
    controlnet_conditioning_scale=0.9,
    control_guidance_end=0.9,
    ip_adapter_image=ip_wolf_image,
    cross_attention_kwargs={"ip_adapter_masks": ip_masks},
    output_type="latent",
).images[0]

In [None]:
pipeline_img2img = AutoPipelineForImage2Image.from_pipe(pipeline, controlnet=None)

prompt = "cinematic film still of a wolf playing basketball, highly detailed, high budget hollywood movie, cinemascope, epic, gorgeous, film grain"

image = pipeline_img2img(
    prompt=prompt,
    negative_prompt=negative_prompt,
    guidance_scale=3.0,
    num_inference_steps=30,
    generator=generator,
    image=latents,
    strength=0.2,
    ip_adapter_image=ip_wolf_image,
    cross_attention_kwargs={"ip_adapter_masks": ip_masks},
).images[0]

image.save("result.png")