<a href="https://colab.research.google.com/github/khaerensml6/uva_exercise/blob/main/uva_diffusion_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone -n https://github.com/khaerensml6/uva_exercise.git --depth 1
!cd uva_exercise; git checkout HEAD clip_client.py
!cd uva_exercise; mv clip_client.py ..


# Data exploration: LAION-5B



In [None]:
import math
from PIL import Image


### Some helper Functions


In [None]:

def crop_image(original_image, size=512):

    if original_image.size[0] < original_image.size[1]:
        resized_image = original_image.resize(
            (size, int(size * original_image.size[1] / original_image.size[0])))
    else:
        resized_image = original_image.resize(
            (int(size * original_image.size[0] / original_image.size[1]), size))

    w, h = resized_image.size
    left = (w - size) // 2
    top = (h - size) // 2
    right = (w + size) // 2
    bottom = (h + size) // 2

    cropped_image = resized_image.crop((left, top, right, bottom))

    return cropped_image

def image_grid(imgs, size):

    if len(imgs) > 4:
      rows=math.ceil(len(imgs)/4)
      cols=4
    else:
      rows = 1
      cols = len(imgs)

    w, h = size, size
    imgs = [crop_image(img, w) for img in imgs]

    grid = Image.new('RGB', size=(cols*w, rows*h))
    grid_w, grid_h = grid.size

    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

# Query Parameters

In [None]:
number_of_images = 20
aesthetic_score = 9 # number from 1-10: "prettiness" of the images to retrieve
aesthetic_weight = 0.5

In [None]:
from clip_client import ClipClient, Modality

laion_client = ClipClient(
            url="https://knn.laion.ai/knn-service",
            indice_name="laion5B-L-14",
            num_images=number_of_images,
            aesthetic_score=aesthetic_score,
            aesthetic_weight=aesthetic_weight,
            modality=Modality.IMAGE,
        )

## Querying LAION with text

In [None]:
query_string = "cat with hat"

In [None]:
retrieved_images = laion_client.query(text=query_string)
print(f"Found {len(retrieved_images)} image urls!")

In [None]:
from PIL import Image
import requests
import io

images = []
for i, response in enumerate(retrieved_images):
  image_url = response["url"]
  try:

    dataBytesIO = io.BytesIO(requests.get(image_url, stream=True).content)
    image = Image.open(dataBytesIO)
    images.append(image)

    print(f"Found image {i} with caption: \n\t \"{response['caption']}\"\n")
  except Exception as e:
    print(f"encountered a dead link for image {i}!\n")

In [None]:
image_grid(images)

## Querying LAION with an image

In [None]:

# download a sample query image:
# or comment out the line below and upload your own image under the name "query_image.png"
!curl "https://media.istockphoto.com/photos/paghetti-with-tomato-sauce-capers-and-olives-picture-id696166506?k=6&amp;m=696166506&amp;s=612x612&amp;w=0&amp;h=-hV4BZr3ekV0tJQ2x-vg_sSQKXm7qaqzDgl8fDEp9NE=" > query_image.png
query_image = Image.open("query_image.png")
query_image


In [None]:

laion_client.num_images=30
retrieved_images = laion_client.query(image="query_image.png")
print(f"Found {len(retrieved_images)} images")


In [None]:
images = []
for i, response in enumerate(retrieved_images):
  image_url = response["url"]
  try:

    dataBytesIO = io.BytesIO(requests.get(image_url, stream=True).content)
    image = Image.open(dataBytesIO)
    images.append(image)

    print(f"Found image {i} with caption: \n\t \"{response['caption']}\"\n")
  except Exception as e:
    print(f"encountered a dead link for image {i}!\n")

In [None]:
image_grid(images)

# Generating images using StableDiffusion

First, make sure we're using a GPU:



In [None]:
!nvidia-smi

If not, you can change the runtime in the upper right corner

### Installing dependencies:

In [None]:
%pip install --quiet --upgrade diffusers transformers accelerate mediapy

In [None]:
from diffusers import AutoPipelineForText2Image
import torch

pipe = AutoPipelineForText2Image.from_pretrained(
    "stabilityai/sdxl-turbo",
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16",
    )

pipe = pipe.to("cuda")

In [None]:
prompt = "Analog photograph of a cat with a hat"

num_inference_steps = 4
number_of_images = 4

images = pipe(
    prompt = [prompt] * number_of_images,
    guidance_scale = 0.5,
    num_inference_steps = num_inference_steps,
    ).images


image_grid(images, 512)

# Using controlnet

In [None]:
!pip install -q opencv-contrib-python
!pip install -q controlnet_aux
!pip install xformers

### Some helper functions

In [None]:
from transformers import pipeline
def create_depth_img(input_image):
    """Creates a depth image from the input image."""
    depth_estimator = pipeline('depth-estimation')

    image = depth_estimator(input_image)['depth']
    image = np.array(image)
    image = image[:, :, None]
    image = np.concatenate([image, image, image], axis=2)
    image = Image.fromarray(image)

    return image

### Download the conditioning image

In [None]:
from diffusers import StableDiffusionControlNetPipeline
from diffusers.utils import load_image


image = load_image(
    "https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png"
)
image

In [None]:
depth_image = create_depth_img(image)
depth_image

In [None]:
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
import torch
from diffusers import UniPCMultistepScheduler

controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
)




### Some optimization settings

In [None]:
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()


In [None]:
prompts = [f"a painting of a women by {painter}" for painter in ["Vincent Van Gogh", "Piet Mondriaan", "Pieter-paul Rubens", "Karel Appel"] ]


print(len(prompts))

output = pipe(
    prompts,
    depth_image,
    negative_prompt=["monochrome, lowres, bad anatomy, worst quality, low quality"] * len(prompts),
    num_inference_steps=20,
)


image_grid(output.images, 512)
