論文<br>
https://arxiv.org/abs/2305.10431<br>
<br>
GitHub<br>
https://github.com/mit-han-lab/fastcomposer<br>
<br>
<a href="https://colab.research.google.com/github/kaz12tech/ai_demos/blob/master/FastComposer_demo.ipynb" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# setup environment

## git clone

In [None]:
%cd /content
!git clone https://github.com/mit-han-lab/fastcomposer.git
%cd /content/fastcomposer
# Commits on Jul 4, 2023
!git checkout 106cf26158524c34d3d82ab0016da3b85c4e42c0

## install libraries

In [None]:
%cd /content/fastcomposer

!pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 torchaudio==2.0.2+cu118
!pip install transformers==4.25.1 accelerate datasets evaluate diffusers==0.16.1 xformers triton scipy clip

!python setup.py install

# download pretrain models

In [None]:
%cd /content/fastcomposer

# create dir
!mkdir -p model/fastcomposer

# download model
!wget -c https://huggingface.co/mit-han-lab/fastcomposer/resolve/main/pytorch_model.bin \
      -O model/fastcomposer/pytorch_model.bin

# setup image

In [None]:
%cd /content/fastcomposer

target_a = 'man'
target_b = 'women'
rootdir = f'input_imgs/{target_a}_{target_b}'
dir_a = f'input_imgs/{target_a}_{target_b}/{target_a}'
dir_b = f'input_imgs/{target_a}_{target_b}/{target_b}'
filename_a = f'{dir_a}/0.jpg'
filename_b = f'{dir_b}/0.jpg'

!rm -rf {rootdir}
!mkdir -p {dir_a}
!mkdir -p {dir_b}


!wget -c https://cdn.shopify.com/s/files/1/0250/3976/5585/files/gandhi_1024x1024.jpg \
      -O {filename_a}

!wget -c https://providence-blue.com/wp-content/uploads/2018/08/florence-nightingale-facts-featured.jpg \
      -O {filename_b}

In [None]:
from PIL import Image
import matplotlib.pyplot as plt


fig = plt.figure(figsize=(15, 10))

# input image
ax0 = fig.add_subplot(1, 2, 1)
plt.title('bach', fontsize=16)
ax0.axis('off')
ax0.imshow( Image.open(filename_a) )

ax1 = fig.add_subplot(1, 2, 2)
plt.title('beethoven', fontsize=16)
ax1.axis('off')
ax1.imshow( Image.open(filename_b) )

plt.show()

# Inference

In [None]:
%cd /content/fastcomposer

!sed -E "s/accelerator.device/\"cuda\"/g" -i /content/fastcomposer/fastcomposer/inference.py
!sed -E "s/map_location=\"cpu\"/map_location=\"cuda\"/g" -i /content/fastcomposer/fastcomposer/inference.py

In [None]:
from accelerate.utils import write_basic_config
write_basic_config()

output_dir = f'output_imgs/{target_a}_{target_b}'

!CUDA_VISIBLE_DEVICES=0 accelerate launch \
    --mixed_precision=fp16 \
    fastcomposer/inference.py \
    --pretrained_model_name_or_path "runwayml/stable-diffusion-v1-5" \
    --finetuned_model_path "model/fastcomposer" \
    --test_reference_folder {rootdir} \
    --test_caption "a man <|image|> and a beautiful woman <|image|> are cooking with wearing an apron, 4k, detail" \
    --output_dir {output_dir} \
    --mixed_precision fp16 \
    --image_encoder_type clip \
    --image_encoder_name_or_path "openai/clip-vit-large-patch14" \
    --num_image_tokens 1 \
    --max_num_objects 2 \
    --object_resolution 224 \
    --generate_height 512 \
    --generate_width 512 \
    --num_images_per_prompt 5 \
    --num_rows 1 \
    --seed 9258305 \
    --guidance_scale 4 \
    --inference_steps 50 \
    --start_merge_step 10 \
    --no_object_augmentation

In [None]:
for i in range(5):
  fig = plt.figure(figsize=(15, 10))
  # input image
  ax = fig.add_subplot(1, 1, 1)
  plt.title('result', fontsize=16)
  ax.axis('off')
  ax.imshow( Image.open(f'{output_dir}/output_{i}.png') )
  plt.show()