# DreamCatalyst Diffusion Optimization Notebook
This notebook runs the SDS/DDS/DWT-DDS optimization interactively using your model pipeline.

In [1]:
# If needed, install dependencies
# !pip install diffusers pytorch_wavelets transformers accelerate


In [2]:
import os
import torch
import numpy as np
from PIL import Image
from diffusers import StableDiffusionPipeline
from dc.utils_ import load_512
from run2d import image_optimization  # Ensure run2d.py is in the same directory or module path


The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

In [3]:
# Set paths and parameters
image_path = "trial.jpg"  # <-- Change to your image path
source_prompt = "a photo of two reflective spheres."
target_prompt = "a photo of two reflective red spheres."

num_iters = 300
guidance_scale = 7.5
wavelet = "db8"
J = 1
use_dds = True
dwt_dds = True
keep_low = False
keep_high = False
disable_wavelet = False
vis_method = "linear"
vis_interval = 10
save_interval = 50
profiling_enabled = False
results_dir = None  # will be auto-named


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_id = "runwayml/stable-diffusion-v1-5"
pipeline = StableDiffusionPipeline.from_pretrained(model_id).to(device)
image = load_512(image_path)


model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

safety_checker/config.json:   0%|          | 0.00/4.72k [00:00<?, ?B/s]

(…)ature_extractor/preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

text_encoder/config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

safety_checker/model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

text_encoder/model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

unet/diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

vae/config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

unet/config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

vae/diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.


In [5]:
image_optimization(
    pipe=pipeline,
    image=image,
    text_source=source_prompt,
    text_target=target_prompt,
    num_iters=num_iters,
    gs=guidance_scale,
    use_dds=use_dds,
    dwt_dds=dwt_dds,
    keep_low=keep_low,
    keep_high=keep_high,
    disable_wavelet=disable_wavelet,
    wavelet=wavelet,
    J=J,
    seed=42,
    results_dir=results_dir,
    image_path=image_path,
    vis_method=vis_method,
    vis_interval=vis_interval,
    save_interval=save_interval,
    profiling_enabled=profiling_enabled
)


  deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False)


==> use dds: True
==> source prompt: a photo of two reflective spheres., target prompt: a photo of two reflective red spheres.
==> using wavelet db8 with J=1
==> Visualization method: linear, interval: 10

IMAGE OPTIMIZATION SETTINGS
Source image: trial.jpg
Source prompt: "a photo of two reflective spheres."
Target prompt: "a photo of two reflective red spheres."
Method: DDS+DWT
Guidance scale: 7.5
Iterations: 300
Wavelet type: db8
Decomposition levels (J): 1
Frequency preservation: None
Output directory: output/trial_a_photo_of_two_reflective_red__20250422_140617
Visualization: linear (every 10 iterations)



Optimizing: 100%|█████████████| 300/300 [00:25<00:00, 11.72iter/s, loss=0.00878]
