In [1]:
!pip install diffusers accelerate safetensors transformers gradio==3.37.0 colab-xterm
%load_ext colabxterm

Collecting diffusers
  Downloading diffusers-0.32.1-py3-none-any.whl.metadata (18 kB)
Collecting gradio==3.37.0
  Downloading gradio-3.37.0-py3-none-any.whl.metadata (17 kB)
Collecting colab-xterm
  Downloading colab_xterm-0.2.0-py3-none-any.whl.metadata (1.2 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio==3.37.0)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting aiohttp~=3.0 (from gradio==3.37.0)
  Downloading aiohttp-3.11.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting fastapi (from gradio==3.37.0)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio==3.37.0)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client>=0.2.10 (from gradio==3.37.0)
  Downloading gradio_client-1.5.3-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx (from gradio==3.37.0)
  Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from g

# Manual inference

In [None]:
import PIL
import requests
import torch
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler

model_id = "timbrooks/instruct-pix2pix"
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, safety_checker=None)
pipe.to("cuda")
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)

url = "https://raw.githubusercontent.com/timothybrooks/instruct-pix2pix/main/imgs/example.jpg"
def download_image(url):
    image = PIL.Image.open(requests.get(url, stream=True).raw)
    image = PIL.ImageOps.exif_transpose(image)
    image = image.convert("RGB")
    return image
image = download_image(url)

prompt = "turn him into cyborg"
images = pipe(prompt, image=image, num_inference_steps=10, image_guidance_scale=1).images
images[0]

# Gradio implementation

In [None]:
import gradio as gr
from IPython import get_ipython
from IPython.display import display
import PIL
import requests
import torch
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
import os

# Load the model (same as before)
model_id = "timbrooks/instruct-pix2pix"
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, safety_checker=None)
pipe.to("cuda")
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)

# Define a function for downloading and saving images
def download_and_save_image(url, filename):
    """Downloads an image and saves it to a file."""
    image = PIL.Image.open(requests.get(url, stream=True).raw)
    image = PIL.ImageOps.exif_transpose(image)
    image = image.convert("RGB")
    image.save(filename)  # Save the image to a file
    return filename # Return the filename

# Define the main inference function for the Gradio app
def inference(image, prompt):
    # No need to convert to PIL Image as it's already in PIL Image format
    # image = PIL.Image.fromarray(image)
    images = pipe(prompt, image=image, num_inference_steps=10, image_guidance_scale=1).images
    images[0].save("output.png")
    return images[0]

# Download and save example images
example_image_1 = download_and_save_image("https://raw.githubusercontent.com/timothybrooks/instruct-pix2pix/main/imgs/example.jpg", "example1.jpg")
example_image_2 = download_and_save_image("https://raw.githubusercontent.com/timothybrooks/instruct-pix2pix/main/imgs/example.jpg", "example2.jpg")

iface = gr.Interface(
    fn=inference,
    inputs=[gr.Image(type="pil"), gr.Textbox(lines=1, placeholder="Enter your prompt here...")],
    outputs=gr.Image(type="pil"),
    title="Instruct Pix2Pix Demo",
    description="Turn your images into something new with Instruct Pix2Pix!",
    examples=[
          [example_image_1, "turn him into cyborg"], # Use filenames instead of image objects
          [example_image_2, "make him a robot"], # Use filenames instead of image objects
          ]
)

iface.launch(share=True, debug=True)

# ControlNet

In [None]:
!git clone https://github.com/lllyasviel/ControlNet.git

In [None]:
%xterm

In [None]:
# !pip install opencv-python transformers accelerate
from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL
from diffusers.utils import load_image
import numpy as np
import torch

import cv2
from PIL import Image

prompt = "aerial view, a futuristic research complex in a bright foggy jungle, hard lighting"
negative_prompt = "low quality, bad quality, sketches"

# download an image
image = load_image(
    "https://hf.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png"
)

# initialize the models and pipeline
controlnet_conditioning_scale = 0.5  # recommended for good generalization
controlnet = ControlNetModel.from_pretrained(
    "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16
)
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, torch_dtype=torch.float16
)
pipe.enable_model_cpu_offload()

# get canny image
image = np.array(image)
image = cv2.Canny(image, 100, 200)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
canny_image = Image.fromarray(image)

# generate image
image = pipe(
    prompt, controlnet_conditioning_scale=controlnet_conditioning_scale, image=canny_image
).images[0]