In [1]:
import os
import torch
from diffusers import StableDiffusion3Pipeline
from typing import Optional

def load_pipeline_with_ti_from_local_folder_step(
    local_embeddings_folder: str,
    base_model_name_or_path: str,
    placeholder_token: str,
    training_step: int,
    torch_dtype: torch.dtype = torch.float16,
    device: Optional[str] = None,
) -> StableDiffusion3Pipeline:
    """
    Loads a Stable Diffusion 3 pipeline and injects Textual Inversion embeddings
    from a local folder, for a specific training step.

    Args:
        local_embeddings_folder (str): Path to the local folder containing
            step-specific embedding files like 'learned_embeds_t1-steps-XXXX.safetensors'.
        base_model_name_or_path (str): The identifier of the base SD3 model.
        placeholder_token (str): The placeholder token used during TI training.
        training_step (int): The specific training step for which to load embeddings.
        torch_dtype (torch.dtype): The torch dtype for loading the pipeline.
        device (Optional[str]): The device to move the pipeline to (e.g., "cuda").
            Defaults to "cuda" if available, else "cpu".

    Returns:
        StableDiffusion3Pipeline: The loaded pipeline with TI embeddings.

    Raises:
        FileNotFoundError: If any of the required embedding files are not found for the step.
        ValueError: If placeholder_token or training_step is not provided or invalid.
    """
    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")

    if not placeholder_token:
        raise ValueError("placeholder_token must be provided.")
    if not isinstance(training_step, int) or training_step <= 0:
        raise ValueError("training_step must be a positive integer.")
    print(f"Using placeholder token: '{placeholder_token}' for training step: {training_step}")

    # Define the filename templates
    embedding_files_templates = [
        {"template": "learned_embeds_t1-steps-{}.safetensors", "encoder_attr": "text_encoder", "tokenizer_attr": "tokenizer"},
        {"template": "learned_embeds_t2-steps-{}.safetensors", "encoder_attr": "text_encoder_2", "tokenizer_attr": "tokenizer_2"},
        {"template": "learned_embeds_t3-steps-{}.safetensors", "encoder_attr": "text_encoder_3", "tokenizer_attr": "tokenizer_3"},
    ]

    # Load the base pipeline
    print(f"Loading base model '{base_model_name_or_path}'...")
    pipe = StableDiffusion3Pipeline.from_pretrained(
        base_model_name_or_path,
        torch_dtype=torch_dtype,
    )
    print("Base pipeline loaded.")

    # Load the textual inversion embeddings for each encoder
    for info in embedding_files_templates:
        filename = info["template"].format(training_step) # Format filename with the specific step
        file_path = os.path.join(local_embeddings_folder, filename)

        if not os.path.exists(file_path):
            raise FileNotFoundError(
                f"Embedding file '{filename}' not found in folder '{local_embeddings_folder}'. "
                f"Expected path: {file_path}"
            )

        text_encoder = getattr(pipe, info["encoder_attr"])
        tokenizer = getattr(pipe, info["tokenizer_attr"])

        print(f"Loading TI embeddings from '{file_path}' for {info['encoder_attr']}...")
        pipe.load_textual_inversion(
            file_path,
            token=placeholder_token,
            text_encoder=text_encoder,
            tokenizer=tokenizer,
        )
        print(f"Successfully loaded TI for {info['encoder_attr']}.")

    print(f"All Textual Inversion embeddings for step {training_step} loaded from local folder.")
    
    pipe.to(device)
    print(f"Pipeline moved to {device}.")

    return pipe

Could not load bitsandbytes native library: /lib64/libc.so.6: version `GLIBC_2.34' not found (required by /mnt/lustre/work/oh/owl661/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda126.so)
Traceback (most recent call last):
  File "/mnt/lustre/work/oh/owl661/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 85, in <module>
    lib = get_native_library()
  File "/mnt/lustre/work/oh/owl661/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 72, in get_native_library
    dll = ct.cdll.LoadLibrary(str(binary_path))
  File "/mnt/lustre/work/oh/owl661/lib/python3.10/ctypes/__init__.py", line 452, in LoadLibrary
    return self._dlltype(name)
  File "/mnt/lustre/work/oh/owl661/lib/python3.10/ctypes/__init__.py", line 374, in __init__
    self._handle = _dlopen(self._name, mode)
OSError: /lib64/libc.so.6: version `GLIBC_2.34' not found (required by /mnt/lustre/work/oh/owl661/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda126.so)

CUDA Setup 

ERROR: Please replace '<YOUR-TRAINING-PLACEHOLDER-TOKEN>' with the actual placeholder token used during training.


In [2]:
# --- Example Usage ---
# Path to the folder containing the step-specific embeddings
my_local_folder_with_embeddings = "/mnt/lustre/work/oh/owl661/compositional-vaes/sd3_whatsappA_embedding/wandb-a5a0j11k"
base_model = "stabilityai/stable-diffusion-3-medium-diffusers" # Or your specific SD3 base model

# !!! IMPORTANT: You NEED to provide the placeholder token that was used during training !!!
# This information should be known from your training setup.
# It's often logged in the W&B run's config or in your training script's arguments.
my_placeholder_token = "<WhatsApp>" # REPLACE THIS! For example: "<whatsappA-style>"

chosen_training_step = 1500

if my_placeholder_token == "<YOUR-TRAINING-PLACEHOLDER-TOKEN>":
    print("ERROR: Please replace '<YOUR-TRAINING-PLACEHOLDER-TOKEN>' with the actual placeholder token used during training.")
else:
    print(f"Attempting to load TI embeddings for step {chosen_training_step} from local folder: {my_local_folder_with_embeddings}")
    try:
        pipeline = load_pipeline_with_ti_from_local_folder_step(
            local_embeddings_folder=my_local_folder_with_embeddings,
            base_model_name_or_path=base_model,
            placeholder_token=my_placeholder_token,
            training_step=chosen_training_step,
            torch_dtype=torch.float16 # Use bfloat16 if your hardware supports it and you trained with it
        )
        print(f"Pipeline with Textual Inversion embeddings for step {chosen_training_step} loaded successfully from local files!")

        # Example prompt using the placeholder token
        prompt_template = "A photo of a cat in the style of {}"
        prompt = prompt_template.format(my_placeholder_token)
        print(f"Generating image with prompt: '{prompt}'")

        # Generate an image
        image = pipeline(prompt, num_inference_steps=28, guidance_scale=7.0).images[0]
        output_filename = f"ti_step{chosen_training_step}_loaded_from_local.png"
        image.save(output_filename)
        print(f"Image saved to {output_filename}")

    except FileNotFoundError as fnfe:
        print(f"File error: {fnfe}")
    except ValueError as ve:
        print(f"Configuration error: {ve}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        import traceback
        traceback.print_exc()

Attempting to load TI embeddings for step 1500 from local folder: /mnt/lustre/work/oh/owl661/compositional-vaes/sd3_whatsappA_embedding/wandb-a5a0j11k
Using device: cuda
Using placeholder token: '<WhatsApp>' for training step: 1500
Loading base model 'stabilityai/stable-diffusion-3-medium-diffusers'...


Loading pipeline components...:   0%|          | 0/9 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers


Base pipeline loaded.
Loading TI embeddings from '/mnt/lustre/work/oh/owl661/compositional-vaes/sd3_whatsappA_embedding/wandb-a5a0j11k/learned_embeds_t1-steps-1500.safetensors' for text_encoder...
An unexpected error occurred: 'StableDiffusion3Pipeline' object has no attribute 'load_textual_inversion'


Traceback (most recent call last):
  File "/tmp/ipykernel_3568093/605165500.py", line 18, in <module>
    pipeline = load_pipeline_with_ti_from_local_folder_step(
  File "/tmp/ipykernel_3568093/3024115148.py", line 75, in load_pipeline_with_ti_from_local_folder_step
    pipe.load_textual_inversion(
  File "/mnt/lustre/work/oh/owl661/lib/python3.10/site-packages/diffusers/configuration_utils.py", line 144, in __getattr__
    raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'StableDiffusion3Pipeline' object has no attribute 'load_textual_inversion'


In [4]:
import torch
from diffusers import StableDiffusion3Pipeline

# pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-large-turbo", torch_dtype=torch.bfloat16)
pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-large-turbo", torch_dtype=torch.float16)
pipe = pipe.to("cuda")

Loading pipeline components...:   0%|          | 0/9 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [29]:
init_latent = torch.randn(1, 16, 128, 128, device="cuda")

In [64]:
with torch.no_grad():
    image = pipe(
        "a dog riding a horse in Tubingen",
        num_inference_steps=10,
        guidance_scale=0.0,
        latents=init_latent,
    ).images[0]
image.save("capybara.png")

  0%|          | 0/10 [00:00<?, ?it/s]

In [71]:
image = pipe(
    "a dog riding a horse in Tubingen",
    num_inference_steps=4,
    guidance_scale=1.0,
    latents=init_latent,
).images[0]
image.save("capybara2.png")

  0%|          | 0/4 [00:00<?, ?it/s]