Installing and importing dependencies

In [None]:
! pip install diffusers[training] accelerate transformers
! pip install diffusers["torch"]
! pip install git+https://github.com/huggingface/diffusers
! pip install torch-fidelity
! pip uninstall torch torchvision torchaudio
! pip install torch torchvision torchaudio
! pip install torchmetrics

In [None]:
! accelerate config default

In [None]:
import os
import numpy as np
import torch
from PIL import Image
from torchvision import transforms
from torchvision.transforms import functional as F
from torchmetrics.image.kid import KernelInceptionDistance
from diffusers import DiffusionPipeline, StableDiffusionPipeline
from diffusers.utils import make_image_grid

Training

In [None]:
! accelerate launch textual_inversion.py --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5" --train_data_dir="/content/IshidaSui" --learnable_property="style" --placeholder_token="<sotonami>" --initializer_token="anime" --resolution=512 --train_batch_size=8 --gradient_accumulation_steps=1 --max_train_steps=10000 --learning_rate=5.0e-04 --scale_lr --lr_scheduler="constant" --lr_warmup_steps=0 --output_dir="/content/"

Tuning on number of training steps

In [None]:
model_id = "runwayml/stable-diffusion-v1-5"
generator = [torch.Generator(device="cuda").manual_seed(i) for i in range(4)]
prompt = "A painting of Kaneki Ken in the style of <sotonami>"

images = []
for i in range(1, 21):
  pipe = StableDiffusionPipeline.from_pretrained(model_id, generator=generator, torch_dtype=torch.float16, use_safetensors=True).to("cuda")
  pipe.load_textual_inversion("learned_embeds-steps-" + str(i * 500) + ".safetensors")
  for img in pipe(prompt, num_inference_steps=50, num_images_per_prompt=4).images:
    images.append(img)

In [None]:
make_image_grid(images[:32], rows=8, cols=4)

In [None]:
make_image_grid(images[32:64], rows=8, cols=4)

In [None]:
make_image_grid(images[64:], rows=4, cols=4)

Image generation script

In [None]:
model_id = "runwayml/stable-diffusion-v1-5"
prompt = "A painting of Kaneki in the style of <sotonami>"

pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, use_safetensors=True).to("cuda")
pipe.load_textual_inversion("sotonami.safetensors")

images = pipe(prompt, num_inference_steps=50, num_images_per_prompt=16).images
make_image_grid(images, rows=4, cols=4)

In [None]:
make_image_grid(images, rows=4, cols=4)

Computing KID

In [None]:
def preprocess_image(image):
    image = torch.tensor(image).unsqueeze(0)
    image = image.permute(0, 3, 1, 2) / 255.0
    return image

transform = transforms.ToTensor()

dataset_path = "/content/Kamao"
image_paths = sorted([os.path.join(dataset_path, x) for x in os.listdir(dataset_path)])
real_images = [np.array(Image.open(path).convert("RGB")) for path in image_paths]
real_images = torch.cat([preprocess_image(image) for image in real_images])

fake_images = images
fake_images = torch.cat([transform(image).unsqueeze(0) for image in fake_images])

In [None]:
kid = KernelInceptionDistance(normalize=True, subset_size=8)
kid.update(real_images, real=True)
kid.update(fake_images, real=False)

print(f"KID: {kid.compute()}")