In [8]:
import torch
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipelineLegacy, DDIMScheduler, AutoencoderKL
from PIL import Image

from ip_adapter import IPAdapterPlus, IPAdapter
base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
vae_model_path = "stabilityai/sd-vae-ft-mse"
image_encoder_path = "models/image_encoder"
ip_ckpt = "models/ip-adapter-plus_sd15.bin"
device = "cuda"
noise_scheduler = DDIMScheduler(
    num_train_timesteps=1000,
    beta_start=0.00085,
    beta_end=0.012,
    beta_schedule="scaled_linear",
    clip_sample=False,
    set_alpha_to_one=False,
    steps_offset=1,
)
vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)
pipe = StableDiffusionPipeline.from_pretrained(
    base_model_path,
    torch_dtype=torch.float16,
    scheduler=noise_scheduler,
    vae=vae,
    feature_extractor=None,
    safety_checker=None
)






Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

In [11]:
import random
ip_adapters_list = ['IP Adapter', 'IP Adapter Plus']
def generate(ip_adapter, image, 
             neg_image, 
             scale,
             scale_start,
             scale_stop,                          
             neg_scale,
             scale_neg_start,
             scale_neg_stop,                           
             prompt, 
             negative_prompt, 
             num_samples, 
             steps, 
             seed):
    if image is None:
      scale =0
      image = Image.open("assets/images/statue.png")
    
    if seed == -1:
      seed = random.randint(0, 1e12)

    if ip_adapter == 'IP Adapter':
      # load ip-adapter
      ip_ckpt = "models/ip-adapter_sd15.bin"
      ip_model = IPAdapter(pipe, image_encoder_path, ip_ckpt, device)
    elif ip_adapter == 'IP Adapter Plus':
      # load ip-adapter
      ip_ckpt = "models/ip-adapter-plus_sd15.bin"
      ip_model = IPAdapterPlus(pipe, image_encoder_path, ip_ckpt, device, num_tokens=16)
    else:
      raise ValueError('IP adapter %s is undefined.'%ip_adapter)

    images = ip_model.generate(pil_image=image, 
                           negative_pil_image=neg_image,
                           prompt = prompt,
                           negative_prompt= negative_prompt,
                           num_samples=num_samples, 
                           scale=scale,  # weight for image prompt
                           scale_start= scale_start,
                           scale_stop= scale_stop,
                           scale_neg = neg_scale, # weight for negative image prompt
                           scale_neg_start = scale_neg_start,
                           scale_neg_stop = scale_neg_stop,                                 
                           num_inference_steps=steps, 
                           seed=seed)
    print(images)
    return images


shortcut_js = """
<script>
function shortcuts(e) {

    if ((event.keyCode == 10 || event.keyCode == 13) && event.ctrlKey) {
        document.getElementById("generate-button").click();
    }
}
document.addEventListener('keyup', shortcuts, false);
</script>
"""

import gradio as gr
with gr.Blocks(head=shortcut_js) as demo:
  resized_image_file = gr.File(visible=False)

  gr.Markdown("# Stable Diffuson Negative Image Prompt")
  with gr.Row():
    with gr.Column():
      with gr.Row():
        with gr.Column():
          prompt = gr.Textbox(label="Prompt", value = "a man")
          scale = gr.Slider(0, 2, value=1, label="Image prompt weight")
          scale_start = gr.Slider(0, 1, value=0, label="Starting step")
          scale_stop = gr.Slider(0, 1, value=1, label="Ending step")
          imagePrompt = gr.Image(label="Image Prompt", type = "pil")
        with gr.Column():      
          negativePrompt = gr.Textbox(label="Negative Prompt", value = "nsfw")
          negativeScale = gr.Slider(0, 2, value=1, label="Negative Image prompt weight")
          negativeScale_start = gr.Slider(0, 1, value=0, label="Starting step")
          negativeScale_stop = gr.Slider(0, 1, value=1, label="Ending step")          
          negativeImagePrompt = gr.Image(label="Negative Image Prompt", type = "pil")
      with gr.Accordion(label="Advanced options", open=False):
        ip_adapter = gr.Dropdown(choices = ip_adapters_list, value = ip_adapters_list[0], label = "IP Adapter", interactive = True)
        numImages = gr.Number(precision=0, label="Number of images", value=1)
        steps = gr.Number(precision=0, label="Number of steps", value=25)
        seed = gr.Number(precision=0, label="Seed", value=-1)
    with gr.Column():
      btn = gr.Button("Generate", elem_id="generate-button")
      imagesOut = gr.Gallery(object_fit="contain", height="auto", selected_index = 0, preview = True)


  inputs = [ip_adapter, imagePrompt, negativeImagePrompt, scale, scale_start, scale_stop, negativeScale, negativeScale_start, negativeScale_stop, prompt, negativePrompt, numImages, steps, seed]
  outputs = imagesOut
  btn.click(generate, inputs=inputs, outputs=outputs)
  demo.queue().launch(debug=True, share=False, inline=False, show_error=True, server_name="0.0.0.0")

Running on local URL:  http://0.0.0.0:7860

To create a public link, set `share=True` in `launch()`.


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7DBFE80>]
use negative image


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7FE6380>]
use negative image


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D2F130>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D2F2B0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D2D180>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D2EC80>]
use negative image


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D2E1D0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D2F4C0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D2FB80>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7CAD660>]
use negative image


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7CADC00>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7CAE950>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7CAFE20>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7CAE830>]
use negative image


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7CAC940>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7CAC3A0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7CAD300>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7CAD4B0>]
use negative image


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D28EE0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D2AAD0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D2A410>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D28D60>]


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C3E590>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C3CE50>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C3DA80>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C3D510>]


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C3EDA0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C3EEF0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C3ECE0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C3F3A0>]


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C20640>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C235B0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C22860>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C23250>]


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C22170>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C23610>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C23160>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C223B0>]


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C21810>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C23F70>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C23F10>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C23B50>]
use negative image


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C224A0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C213F0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C21660>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C22BF0>]
use negative image


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7CAE110>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C62C50>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C61BA0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C61E70>]
use negative image


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C62290>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C621D0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C61AE0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7C62DA0>]
use negative image


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7DA7FA0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7DA6680>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7DA6320>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7DA7910>]
use negative image


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D29510>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D297B0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D2ADD0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D28850>]
use negative image


  0%|          | 0/25 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D2FFA0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D2FA90>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D2E470>, <PIL.Image.Image image mode=RGB size=512x512 at 0x1B3D7D2F820>]
Keyboard interruption in main thread... closing server.
