In [None]:
import torch
from diffusers import LCMScheduler, AutoPipelineForText2Image

# https://huggingface.co/latent-consistency/lcm-lora-sdv1-5
model_id = "Lykon/dreamshaper-8"
adapter_id = "latent-consistency/lcm-lora-sdv1-5"

pipe = AutoPipelineForText2Image.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16")
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.to("mps")

# nope, not for mps
# pipe = torch.compile(pipe)

# load and fuse lcm lora
pipe.load_lora_weights(adapter_id)
pipe.fuse_lora()

# 512x512 image
prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"

with torch.inference_mode():
    # disable guidance_scale by passing 0
    image = pipe(prompt=prompt, num_inference_steps=4, guidance_scale=0).images[0]


In [None]:
%%time

# mps: 3.0s

# prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"
# prompt = "high quality picture, award winning landscape photography of Tokyo, Japan, 4k"
# prompt = "award winning photography of Tokyo, Japan, detailed, 8k, daytime"
# prompt = "high quality picture, award winning photography of Tokyo, Japan, detailed, 8k, daytime, aesthetic, magazine cover, 8k"
# prompt = "photography of Tokyo, Japan"
prompt = "photography of Jersey Island, United Kingdom"
negative_prompt = None

with torch.inference_mode():
    # disable guidance_scale by passing 0
    # image = pipe(prompt=prompt, num_inference_steps=4, guidance_scale=0).images[0]
    image = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=6,
        # guidance_scale should be 0, or in 1-2
        guidance_scale=2,
        width=512, height=512, # 3s
        # width=256, height=256, # 0.9s
    ).images[0]
    

In [None]:
image

In [None]:
import torch
from diffusers import StableDiffusionPipeline

# Work fast, low diversity
# https://github.com/IDKiro/sdxs
# https://huggingface.co/IDKiro/sdxs-512-0.9
# seed = 42
# weight_type = torch.float32
weight_type = torch.float16

# https://github.com/IDKiro/sdxs
# Load model.
pipe = StableDiffusionPipeline.from_pretrained(
    # "IDKiro/sdxs-512-0.9",
    "IDKiro/sdxs-512-dreamshaper",
    torch_dtype=weight_type,
)

# use original VAE
# pipe.vae = AutoencoderKL.from_pretrained("IDKiro/sdxs-512-0.9/vae_large")
pipe.to("mps")

pipe.set_progress_bar_config(disable=True)

# 512x512 image
prompt = "high quality picture, award winning photography of Saint Pierre and Miquelon, France, detailed, daytime, aesthetic, magazine cover, 8k"

with torch.inference_mode():
    image = pipe(
        prompt=prompt, num_inference_steps=1, guidance_scale=0
    ).images[0]

g = torch.Generator()
g.manual_seed(42)

In [None]:
%%time
place = "Jersey Island, United Kingdom"
place = "Tokyo, Japan"
place = "Kerguelen Islands, France"
place = "Terrer Adélie, Antarctica"
place = "Clipperton Island, France"
place = "Saint Pierre and Miquelon, France"
place = "Saint Barthélemy, France"
place = "Rome, Italy"

prompt = f"high quality picture, award winning photography of {place}, detailed, daytime, aesthetic, 8k"
# prompt = f"award winning photography of {place}"

# with torch.inference_mode(mode=True):
image = pipe(
    prompt=prompt,
    # negative_prompt=negative_prompt,  # no effect
    num_inference_steps=1,
    guidance_scale=0,
    # guidance_scale=1,
    width=512, height=512, # 0.3s
    # width=256, height=256, # 0.17s ~100ms
    # clip_skip=3,
    generator=g,
).images[0]

In [None]:
image

Try CoreML based stuff:

SDXS model already converted:
https://huggingface.co/lsb/6-bit-palettized-sdxs-512-dreamshaper/tree/main

Look at examples here:
https://huggingface.co/apple/coreml-stable-diffusion-2-1-base-palettized

Inference like that:
https://github.com/apple/ml-stable-diffusion/blob/main/python_coreml_stable_diffusion/pipeline.py

---

---

---

2024/10/21

TODO: try that https://github.com/ml-explore/mlx-examples/tree/main/flux

In [2]:
# https://huggingface.co/madebyollin/taef1
# https://github.com/madebyollin/taesd

import torch
from diffusers import FluxPipeline, AutoencoderTiny

pipe = FluxPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-schnell",
    torch_dtype=torch.bfloat16
)
pipe.vae = AutoencoderTiny.from_pretrained(
    "madebyollin/taef1",
    torch_dtype=torch.bfloat16,
)
pipe.enable_sequential_cpu_offload()


Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with: 
```
pip install accelerate
```
.


model_index.json:   0%|          | 0.00/536 [00:00<?, ?B/s]

Fetching 23 files:   0%|          | 0/23 [00:00<?, ?it/s]

text_encoder/config.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

(…)t_encoder_2/model.safetensors.index.json:   0%|          | 0.00/19.9k [00:00<?, ?B/s]

text_encoder_2/config.json:   0%|          | 0.00/782 [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.53G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/705 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/246M [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/588 [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/274 [00:00<?, ?B/s]

tokenizer_2/special_tokens_map.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

tokenizer_2/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

tokenizer_2/tokenizer_config.json:   0%|          | 0.00/20.8k [00:00<?, ?B/s]

transformer/config.json:   0%|          | 0.00/321 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

(…)pytorch_model-00001-of-00003.safetensors:   0%|          | 0.00/9.96G [00:00<?, ?B/s]

(…)pytorch_model-00002-of-00003.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

(…)pytorch_model-00003-of-00003.safetensors:   0%|          | 0.00/3.87G [00:00<?, ?B/s]

(…)ion_pytorch_model.safetensors.index.json:   0%|          | 0.00/121k [00:00<?, ?B/s]

vae/config.json:   0%|          | 0.00/774 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]


KeyboardInterrupt



In [None]:
prompt = "slice of delicious New York-style berry cheesecake"
image = pipe(
    prompt,
    guidance_scale=0.0,
    num_inference_steps=4,
    max_sequence_length=256,
).images[0]
# image.save("cheesecake.png")
image