Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dev_support_diffusers_ipa #837

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open

dev_support_diffusers_ipa #837

wants to merge 10 commits into from

Conversation

ccssu
Copy link
Contributor

@ccssu ccssu commented Apr 24, 2024

Install:

step 1: pip install diffusers==0.27

step2. OneDiff Installation Guide

step3. OneDiffx Installation Guide

Usage:

script_00.py
""" 
### Install
## Prepare environment

You need to complete the following environment dependency installation.

- 1. [OneDiff Installation Guide](https://github.com/siliconflow/onediff/blob/main/README_ENTERPRISE.md#install-onediff-enterprise)
- 2. [OneDiffx Installation Guide](https://github.com/siliconflow/onediff/tree/main/onediff_diffusers_extensions#install-and-setup)

### Usage:
    python script_00.py \
        --model_id "stabilityai/stable-diffusion-xl-base-1.0"\
        --cache_dir "./cache" \
        --num_inference_steps 100 \
        --guidance_scale 7.5 \
        --seed 0 \
        --warmup_steps 3
"""
import os
import argparse
import torch
from diffusers import AutoPipelineForText2Image
from diffusers.utils import load_image as _load_image
from onediffx import compile_pipe, load_pipe, save_pipe

def parse_arguments():
    parser = argparse.ArgumentParser(description="Text-to-Image Generation")
    parser.add_argument("--model_id", default="stabilityai/stable-diffusion-xl-base-1.0", help="Model ID")
    parser.add_argument("--cache_dir", default="./cache", help="Cache directory")
    parser.add_argument("--image", default="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/ip_adapter_diner.png", help="Image URL")
    parser.add_argument("--output", default="output.png", help="Output image filename")
    parser.add_argument("--prompt", default="a polar bear sitting in a chair drinking a milkshake", help="Prompt")
    parser.add_argument("--negative_prompt", default="deformed, ugly, wrong proportion, low res, bad anatomy, worst quality, low quality", help="Negative prompt")
    parser.add_argument("--num_inference_steps", type=int, default=100, help="Number of inference steps")
    parser.add_argument("--guidance_scale", type=float, default=7.5, help="Guidance scale")
    parser.add_argument("--seed", type=int, default=0, help="Random seed")
    parser.add_argument("--warmup_steps", type=int, default=3, help="Number of warmup steps")
    return parser.parse_args()

def load_image(url, cache_dir="."):
    file_name = url.split("/")[-1]
    file_path = os.path.join(cache_dir, file_name)
    if os.path.exists(file_path):
        image = _load_image(file_path)
    else:
        image = _load_image(url)
        image.save(file_path)
    return image

def main():
    # Parse arguments
    args = parse_arguments()
    # Load pre-trained pipeline
    pipe = AutoPipelineForText2Image.from_pretrained(args.model_id, torch_dtype=torch.float16).to("cuda")
    pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
    pipe.set_ip_adapter_scale(0.6)

    # Load images
    image = load_image(args.image)

    # Set up random generator
    generator = torch.Generator(device="cuda").manual_seed(args.seed)

    # Compile and load pipeline
    pipe = compile_pipe(pipe)
    cache_path = os.path.join(args.cache_dir, type(pipe).__name__)
    if os.path.exists(cache_path):
        load_pipe(pipe, cache_path)

    # Run pipeline to generate images
    for _ in range(args.warmup_steps + 1):
        images = pipe(
            prompt=args.prompt,
            ip_adapter_image=image,
            negative_prompt=args.negative_prompt,
            num_inference_steps=args.num_inference_steps,
            guidance_scale=args.guidance_scale,
            generator=generator,
        ).images[0]
    
    # Save generated images
    images.save(args.output)

    # Save compiled pipeline if not already cached
    if not os.path.exists(cache_path):
        os.makedirs(cache_path)
        save_pipe(pipe, cache_path)

if __name__ == "__main__":
    main()

@ccssu
Copy link
Contributor Author

ccssu commented Apr 24, 2024

pip install git+https://github.com/huggingface/diffusers.git
diffusers: 0.28.0.dev0

script_01.py
import torch
from transformers import CLIPVisionModelWithProjection

from diffusers import DPMSolverMultistepScheduler, StableDiffusionXLPipeline
from diffusers.image_processor import IPAdapterMaskProcessor
from diffusers.utils import  logging
from diffusers.utils.logging import set_verbosity
import os 
def load_image(url, cache_dir="."):
    from diffusers.utils import load_image as _load_image
    file_name =  url.split("/")[-1]
    file_path = os.path.join(cache_dir, file_name)
    if os.path.exists(file_path):
        image = _load_image(file_path)
    else:
        image = _load_image(url)
        image.save(file_path)
    return image

set_verbosity(logging.ERROR)  # to not show cross_attention_kwargs...by AttnProcessor2_0 warnings

# load & process masks
composition_mask = load_image(
    "https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/1024_whole_mask.png"
)
female_mask = load_image(
    "https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/ip_adapter_None_20240321125641_mask.png"
)
male_mask = load_image(
    "https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/ip_adapter_None_20240321125344_mask.png"
)
background_mask = load_image(
    "https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/ip_adapter_6_20240321130722_mask.png"
)

print(f'--'*20, 'start', '--'*20)
processor = IPAdapterMaskProcessor()
masks1 = processor.preprocess([composition_mask], height=1024, width=1024)
masks2 = processor.preprocess([female_mask, male_mask, background_mask], height=1024, width=1024)
masks2 = masks2.reshape(1, masks2.shape[0], masks2.shape[2], masks2.shape[3])  # output -> (1, 3, 1024, 1024)
masks = [masks1, masks2]

# load images
ip_composition_image = load_image(
    "https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/ip_adapter__20240321125152.png"
)
ip_female_style = load_image(
    "https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/ip_adapter__20240321125625.png"
)
ip_male_style = load_image(
    "https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/ip_adapter__20240321125329.png"
)
ip_background = load_image(
    "https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/ip_adapter__20240321130643.png"
)


image_encoder = CLIPVisionModelWithProjection.from_pretrained(
    "h94/IP-Adapter", subfolder="models/image_encoder", torch_dtype=torch.float16
).to("cuda")

pipeline = StableDiffusionXLPipeline.from_pretrained(
    "RunDiffusion/Juggernaut-XL-v9", torch_dtype=torch.float16, image_encoder=image_encoder, variant="fp16"
).to("cuda")

pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config)
pipeline.scheduler.config.use_karras_sigmas = True

pipeline.load_ip_adapter(
    ["ostris/ip-composition-adapter", "h94/IP-Adapter"],
    subfolder=["", "sdxl_models"],
    weight_name=[
        "ip_plus_composition_sdxl.safetensors",
        "ip-adapter_sdxl_vit-h.safetensors",
    ],
    image_encoder_folder=None,
)
pipeline.set_ip_adapter_scale([1.0, [0.75, 0.75, 0.3]])

prompt = "high quality, cinematic photo, cinemascope, 35mm, film grain, highly detailed"
negative_prompt = "anime, cartoon"

from onediff.infer_compiler import oneflow_compile
pipeline.unet = oneflow_compile(pipeline.unet)

for _ in range(10):
    image = pipeline(
        prompt=prompt,
        negative_prompt="",
        ip_adapter_image=[ip_composition_image, [ip_female_style, ip_male_style, ip_background]],
        cross_attention_kwargs={"ip_adapter_masks": masks},
        guidance_scale=6.5,
        num_inference_steps=25,
    ).images[0]

image.save("yiyi_test_mask_multi_out.png")

image

@ccssu ccssu requested a review from hjchen2 April 29, 2024 03:24
@ccssu ccssu mentioned this pull request Apr 29, 2024
@ccssu
Copy link
Contributor Author

ccssu commented May 21, 2024

Example: diffusers feature to set ip_adapters scale on runtime.

""" 
### Install
## Prepare environment

You need to complete the following environment dependency installation.

- 1. [OneDiff Installation Guide](https://github.com/siliconflow/onediff/blob/main/README_ENTERPRISE.md#install-onediff-enterprise)
- 2. [OneDiffx Installation Guide](https://github.com/siliconflow/onediff/tree/main/onediff_diffusers_extensions#install-and-setup)

### Usage:
    python script_00.py \
        --model_id "stabilityai/stable-diffusion-xl-base-1.0"\
        --cache_dir "./cache" \
        --num_inference_steps 100 \
        --guidance_scale 7.5 \
        --seed 0 \
        --warmup_steps 3
"""
import os
import argparse
import torch
from diffusers import StableDiffusionXLPipeline
from diffusers.utils import load_image as _load_image
from onediffx import compile_pipe, load_pipe, save_pipe

def parse_arguments():
    parser = argparse.ArgumentParser(description="Text-to-Image Generation")
    parser.add_argument("--model_id", default="stabilityai/stable-diffusion-xl-base-1.0", help="Model ID")
    parser.add_argument("--cache_dir", default="./cache", help="Cache directory")
    parser.add_argument("--image", default="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/ip_adapter_diner.png", help="Image URL")
    parser.add_argument("--output", default="output.png", help="Output image filename")
    parser.add_argument("--prompt", default="a polar bear sitting in a chair drinking a milkshake", help="Prompt")
    parser.add_argument("--negative_prompt", default="deformed, ugly, wrong proportion, low res, bad anatomy, worst quality, low quality", help="Negative prompt")
    parser.add_argument("--num_inference_steps", type=int, default=20, help="Number of inference steps")
    parser.add_argument("--guidance_scale", type=float, default=7.5, help="Guidance scale")
    parser.add_argument("--seed", type=int, default=0, help="Random seed")
    parser.add_argument("--warmup_steps", type=int, default=3, help="Number of warmup steps")
    return parser.parse_args()

def load_image(url, cache_dir="."):
    file_name = url.split("/")[-1]
    file_path = os.path.join(cache_dir, file_name)
    if os.path.exists(file_path):
        image = _load_image(file_path)
    else:
        image = _load_image(url)
        image.save(file_path)
    return image

def main():
    # Parse arguments
    args = parse_arguments()
    # Load pre-trained pipeline
    pipe = StableDiffusionXLPipeline.from_pretrained(args.model_id, torch_dtype=torch.float16).to("cuda")
    pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
    # lib/python3.10/site-packages/diffusers/loaders/ip_adapter.py
    input_ipa_scale = torch.tensor(0.5).cuda()
    pipe.set_ip_adapter_scale(input_ipa_scale)
    # Load images
    image = load_image(args.image)

    # Set up random generator
    generator = torch.Generator(device="cuda").manual_seed(args.seed)

    # Compile and load pipeline
    pipe = compile_pipe(pipe)
    cache_path = os.path.join(args.cache_dir, type(pipe).__name__)
    if os.path.exists(cache_path):
        load_pipe(pipe, cache_path)

    # Run pipeline to generate images
    # for _ in range(args.warmup_steps + 1):
    for scale in range(2, 10, 2):
        assert scale > 0
        
        torch.manual_seed(args.seed)
        input_ipa_scale.copy_(torch.tensor(scale / 10))
        images = pipe(
            prompt=args.prompt,
            ip_adapter_image=image,
            negative_prompt=args.negative_prompt,
            num_inference_steps=args.num_inference_steps,
            guidance_scale=args.guidance_scale,
            generator=generator,
        ).images[0]
    
        # Save generated images
        output_file = f"_{scale}.png"
        print(f'{output_file=}')
        images.save(output_file)

    # Save compiled pipeline if not already cached
    if not os.path.exists(cache_path):
        os.makedirs(cache_path)
        save_pipe(pipe, cache_path)

if __name__ == "__main__":
    main()

@lijunliangTG lijunliangTG enabled auto-merge (squash) May 27, 2024 06:55
@zhangvia
Copy link

Example: diffusers feature to set ip_adapters scale on runtime.

did you try different output resolutions when use ipadapter? i think it will trigger recompile.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

5 participants