# Stable diffusion experiments

## Env. setup

In [None]:
! nvidia-smi

### Installations

In [None]:
! pip install -q diffusers
! pip install -q transformers accelerate
! pip install -q torch torchvision -U
! pip install -q xformers git+https://github.com/huggingface/accelerate.git
! pip install -q opencv-contrib-python
! pip install -q controlnet_aux

### Imports

In [None]:
import os
import cv2
import torch
import tempfile

import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

from diffusers import StableDiffusionImg2ImgPipeline, StableDiffusionControlNetPipeline, ControlNetModel
from diffusers import UniPCMultistepScheduler
from diffusers.utils import load_image

In [None]:
torch.__version__

### Global Env. variables

In [None]:
load_loc = 'templates'
save_loc = 'outputs'

clear_workspace = False

In [None]:
if not os.path.exists(load_loc):
    os.makedirs(load_loc)

if not os.path.exists(save_loc):
  os.makedirs(save_loc)

### Stable diffusion setup

In [None]:
model_id_or_path = 'Ojimi/anime-kawai-diffusion'
# torch_dtype = torch.float32

# model_id_or_path = "runwayml/stable-diffusion-v1-5"
torch_dtype=torch.float16

In [None]:
# Stable diffusion pipe
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype = torch_dtype)
sd_pipe = sd_pipe.to('cuda')

### ControlNet setup

In [None]:
controlnet = ControlNetModel.from_pretrained('lllyasviel/sd-controlnet-canny', torch_dtype = torch_dtype)
cn_pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id_or_path, controlnet = controlnet, torch_dtype = torch_dtype)

In [None]:
cn_pipe.scheduler = UniPCMultistepScheduler.from_config(cn_pipe.scheduler.config)

In [None]:
cn_pipe.enable_model_cpu_offload()

In [None]:
try:
  cn_pipe.enable_xformers_memory_efficient_attention()
except:
  pass

### Methods

In [None]:
def generate_temp_name():
  temp = tempfile.NamedTemporaryFile()
  temp_name = temp.name.split('/')[-1]

  return temp_name

In [None]:
def process(input_image, pipe, prompt, **kwargs):
  output_images = pipe(image = input_image, prompt = prompt, **kwargs).images

  return output_images

In [None]:
def load(image_name, image_path = load_loc, shape = (400, 500), format = 'RGB'):
  image_path = os.path.join(image_path, image_name)
  image = Image.open(image_path).convert(format).resize(shape)

  return image

In [None]:
def save(images, save_loc = save_loc):
  temp_name = generate_temp_name()
  image_loc = os.path.join(save_loc, f'{temp_name}.png')

  images[0].save(image_loc)

  return image_loc

In [None]:
def canny_image(image, threshold):
  image = np.array(image)
  low_threshold, high_threshold = threshold

  image = cv2.Canny(image, low_threshold, high_threshold)
  image = image[:, :, None]
  image = np.concatenate([image, image, image], axis = 2)

  canny = Image.fromarray(image)

  return canny

In [None]:
def plot(images, title):
  plt.title(title)
  plt.imshow(images[0])
  plt.axis('off')
  plt.show()

## Experiments

### Stable Diffusion

In [None]:
image_name = 'img_6.jpg'

input_image = load(image_name, shape = (400, 600))
plot([input_image], title = 'Original')

In [None]:
prompt = "Upper body, official art, slim, beautiful, aesthetic, looking at viewer, dynamic pose, perfect angle, long hair, petite, beautiful skin, background, beautiful detailed eyes, muted color, depth of field, volumetric lighting, reflection, finest detail, ultra detailed, perfect face, epic proportion, epic composition, professional work"

In [None]:
negative_prompt = 'lowres, bad anatomy'

In [None]:
images = process(input_image, sd_pipe, prompt, negative_prompt = negative_prompt, strength = 0.75, guidance_scale = 3.5)
output_image = save(images)

print(output_image)
plot(images, title = output_image.split('/')[-1])

### ControlNet

In [None]:
# input_image = load_image(
#     "https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png"
# )
# input_image

In [None]:
threshold = (100, 200)

image = canny_image(input_image, threshold)
image

In [None]:
images = process(image, cn_pipe, prompt, negative_prompt = negative_prompt, num_inference_steps = 20)
output_image = save(images)

print(output_image)
plot(images, title = output_image.split('/')[-1])

In [None]:
if clear_workspace:
  ! rm outputs/*

## Resources