From 99616b935da65ad16c795e1d4a674e498b14c10f Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Fri, 12 Sep 2025 12:13:30 -0300 Subject: [PATCH 1/4] avoid crash with invalid tile sizes, use 0 for default --- stable-diffusion.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 07f45510f..bc5575f32 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -108,7 +108,7 @@ class StableDiffusionGGML { std::string taesd_path; bool use_tiny_autoencoder = false; - sd_tiling_params_t vae_tiling_params = {false, 32, 32, 0.5f, false, 0, 0}; + sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, false, 0, 0}; bool offload_params_to_cpu = false; bool stacked_id = false; @@ -1339,8 +1339,12 @@ class StableDiffusionGGML { // TODO: args instead of env for tile size / overlap? if (!use_tiny_autoencoder) { float tile_overlap = vae_tiling_params.target_overlap; - int tile_size_x = vae_tiling_params.tile_size_x; - int tile_size_y = vae_tiling_params.tile_size_y; + int tile_size_x = (vae_tiling_params.tile_size_x >= 4) + ? vae_tiling_params.tile_size_x + : 32; + int tile_size_y = (vae_tiling_params.tile_size_y >= 4) + ? vae_tiling_params.tile_size_y + : 32; if (vae_tiling_params.relative) { get_relative_tile_sizes(tile_size_x, tile_size_y, tile_overlap, vae_tiling_params.rel_size_x, vae_tiling_params.rel_size_y, W, H); @@ -1490,8 +1494,12 @@ class StableDiffusionGGML { int64_t t0 = ggml_time_ms(); if (!use_tiny_autoencoder) { float tile_overlap = vae_tiling_params.target_overlap; - int tile_size_x = vae_tiling_params.tile_size_x; - int tile_size_y = vae_tiling_params.tile_size_y; + int tile_size_x = (vae_tiling_params.tile_size_x >= 4) + ? vae_tiling_params.tile_size_x + : 32; + int tile_size_y = (vae_tiling_params.tile_size_y >= 4) + ? vae_tiling_params.tile_size_y + : 32; if (vae_tiling_params.relative) { get_relative_tile_sizes(tile_size_x, tile_size_y, tile_overlap, vae_tiling_params.rel_size_x, vae_tiling_params.rel_size_y, x->ne[0], x->ne[1]); @@ -1769,7 +1777,7 @@ void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params) { sd_img_gen_params->control_strength = 0.9f; sd_img_gen_params->style_strength = 20.f; sd_img_gen_params->normalize_input = false; - sd_img_gen_params->vae_tiling_params = {false, 32, 32, 0.5f, false, 0.0f, 0.0f}; + sd_img_gen_params->vae_tiling_params = {false, 0, 0, 0.5f, false, 0.0f, 0.0f}; } char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) { From 570e26ab8d1cc6a90e0b0eaf6cfd419bb14aa7c0 Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Fri, 12 Sep 2025 18:23:25 -0300 Subject: [PATCH 2/4] refactor default tile size, limit overlap factor --- stable-diffusion.cpp | 66 ++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index bc5575f32..531fec639 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -1300,25 +1300,28 @@ class StableDiffusionGGML { return latent; } - void get_relative_tile_sizes(int& tile_size_x, int& tile_size_y, float tile_overlap, float rel_size_x, float rel_size_y, int latent_x, int latent_y) { - // format is AxB, or just A (equivalent to AxA) - // A and B can be integers (tile size) or floating point - // floating point <= 1 means simple fraction of the latent dimension - // floating point > 1 means number of tiles across that dimension - // a single number gets applied to both - auto get_tile_factor = [tile_overlap](float factor) { - if (factor > 1.0) - factor = 1 / (factor - factor * tile_overlap + tile_overlap); - return factor; - }; - const int min_tile_dimension = 4; - int tmp_x = tile_size_x, tmp_y = tile_size_y; - tmp_x = std::round(latent_x * get_tile_factor(rel_size_x)); - tmp_y = std::round(latent_y * get_tile_factor(rel_size_y)); + void get_tile_sizes(int& tile_size_x, int& tile_size_y, float& tile_overlap, const sd_tiling_params_t & params, int latent_x, int latent_y) { + tile_overlap = std::max(std::min(params.target_overlap, 0.5f), 0.0f); + auto get_tile_size = [&](int requested_size, float factor, int latent_size) { + const int default_tile_size = 32; + const int min_tile_dimension = 4; + int tile_size = default_tile_size; + // rel_size <= 1 means simple fraction of the latent dimension + // rel_size > 1 means number of tiles across that dimension + if (params.relative) { + if (factor > 1.0) + factor = 1 / (factor - factor * tile_overlap + tile_overlap); + tile_size = std::round(latent_size * factor); + } + else if (requested_size >= min_tile_dimension) { + tile_size = requested_size; + } + return std::max(std::min(tile_size, latent_size), min_tile_dimension); + }; - tile_size_x = std::max(std::min(tmp_x, latent_x), min_tile_dimension); - tile_size_y = std::max(std::min(tmp_y, latent_y), min_tile_dimension); + tile_size_x = get_tile_size(params.tile_size_x, params.rel_size_x, latent_x); + tile_size_y = get_tile_size(params.tile_size_y, params.rel_size_y, latent_y); LOG_INFO("VAE Tile size: %dx%d", tile_size_x, tile_size_y); } @@ -1336,19 +1339,11 @@ class StableDiffusionGGML { } result = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, x->ne[3]); } - // TODO: args instead of env for tile size / overlap? - if (!use_tiny_autoencoder) { - float tile_overlap = vae_tiling_params.target_overlap; - int tile_size_x = (vae_tiling_params.tile_size_x >= 4) - ? vae_tiling_params.tile_size_x - : 32; - int tile_size_y = (vae_tiling_params.tile_size_y >= 4) - ? vae_tiling_params.tile_size_y - : 32; - if (vae_tiling_params.relative) { - get_relative_tile_sizes(tile_size_x, tile_size_y, tile_overlap, vae_tiling_params.rel_size_x, vae_tiling_params.rel_size_y, W, H); - } + if (!use_tiny_autoencoder) { + float tile_overlap; + int tile_size_x, tile_size_y; + get_tile_sizes(tile_size_x, tile_size_y, tile_overlap, vae_tiling_params, W, H); // TODO: also use an arg for this one? // multiply tile size for encode to keep the compute buffer size consistent @@ -1493,17 +1488,10 @@ class StableDiffusionGGML { } int64_t t0 = ggml_time_ms(); if (!use_tiny_autoencoder) { - float tile_overlap = vae_tiling_params.target_overlap; - int tile_size_x = (vae_tiling_params.tile_size_x >= 4) - ? vae_tiling_params.tile_size_x - : 32; - int tile_size_y = (vae_tiling_params.tile_size_y >= 4) - ? vae_tiling_params.tile_size_y - : 32; + float tile_overlap; + int tile_size_x, tile_size_y; + get_tile_sizes(tile_size_x, tile_size_y, tile_overlap, vae_tiling_params, W, H); - if (vae_tiling_params.relative) { - get_relative_tile_sizes(tile_size_x, tile_size_y, tile_overlap, vae_tiling_params.rel_size_x, vae_tiling_params.rel_size_y, x->ne[0], x->ne[1]); - } LOG_DEBUG("VAE Tile size: %dx%d", tile_size_x, tile_size_y); process_latent_out(x); From 06b4130509d3bf090a748234c6e0ce0eb3db8809 Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Fri, 12 Sep 2025 18:30:46 -0300 Subject: [PATCH 3/4] remove explicit parameter for relative tile size --- examples/cli/main.cpp | 4 +--- stable-diffusion.cpp | 10 +++++----- stable-diffusion.h | 1 - 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index f255c85c9..aff822907 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -118,7 +118,7 @@ struct SDParams { int chroma_t5_mask_pad = 1; float flow_shift = INFINITY; - sd_tiling_params_t vae_tiling_params = {false, 32, 32, 0.5f, false, 0.0f, 0.0f}; + sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f}; SDParams() { sd_sample_params_init(&sample_params); @@ -749,7 +749,6 @@ void parse_args(int argc, const char** argv, SDParams& params) { } catch (const std::out_of_range& e) { return -1; } - params.vae_tiling_params.relative = false; return 1; }; @@ -773,7 +772,6 @@ void parse_args(int argc, const char** argv, SDParams& params) { } catch (const std::out_of_range& e) { return -1; } - params.vae_tiling_params.relative = true; return 1; }; diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 531fec639..acc9deeca 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -108,7 +108,7 @@ class StableDiffusionGGML { std::string taesd_path; bool use_tiny_autoencoder = false; - sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, false, 0, 0}; + sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, 0, 0}; bool offload_params_to_cpu = false; bool stacked_id = false; @@ -1309,7 +1309,7 @@ class StableDiffusionGGML { int tile_size = default_tile_size; // rel_size <= 1 means simple fraction of the latent dimension // rel_size > 1 means number of tiles across that dimension - if (params.relative) { + if (factor > 0.f) { if (factor > 1.0) factor = 1 / (factor - factor * tile_overlap + tile_overlap); tile_size = std::round(latent_size * factor); @@ -1322,8 +1322,6 @@ class StableDiffusionGGML { tile_size_x = get_tile_size(params.tile_size_x, params.rel_size_x, latent_x); tile_size_y = get_tile_size(params.tile_size_y, params.rel_size_y, latent_y); - - LOG_INFO("VAE Tile size: %dx%d", tile_size_x, tile_size_y); } ggml_tensor* encode_first_stage(ggml_context* work_ctx, ggml_tensor* x, bool decode_video = false) { @@ -1350,6 +1348,8 @@ class StableDiffusionGGML { tile_size_x *= 1.30539; tile_size_y *= 1.30539; + LOG_DEBUG("VAE Tile size: %dx%d", tile_size_x, tile_size_y); + process_vae_input_tensor(x); if (vae_tiling_params.enabled && !decode_video) { auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) { @@ -1765,7 +1765,7 @@ void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params) { sd_img_gen_params->control_strength = 0.9f; sd_img_gen_params->style_strength = 20.f; sd_img_gen_params->normalize_input = false; - sd_img_gen_params->vae_tiling_params = {false, 0, 0, 0.5f, false, 0.0f, 0.0f}; + sd_img_gen_params->vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f}; } char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) { diff --git a/stable-diffusion.h b/stable-diffusion.h index 0e37970b3..682777410 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -118,7 +118,6 @@ typedef struct { int tile_size_x; int tile_size_y; float target_overlap; - bool relative; float rel_size_x; float rel_size_y; } sd_tiling_params_t; From 0e56bc7e566df0600907e9a4fbadd25bfc7b5b1f Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Fri, 12 Sep 2025 18:49:54 -0300 Subject: [PATCH 4/4] limit encoding tile to latent size --- stable-diffusion.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index acc9deeca..9085bd37c 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -1301,7 +1301,8 @@ class StableDiffusionGGML { } - void get_tile_sizes(int& tile_size_x, int& tile_size_y, float& tile_overlap, const sd_tiling_params_t & params, int latent_x, int latent_y) { + void get_tile_sizes(int& tile_size_x, int& tile_size_y, float& tile_overlap, const sd_tiling_params_t & params, + int latent_x, int latent_y, float encoding_factor = 1.0f) { tile_overlap = std::max(std::min(params.target_overlap, 0.5f), 0.0f); auto get_tile_size = [&](int requested_size, float factor, int latent_size) { const int default_tile_size = 32; @@ -1317,6 +1318,7 @@ class StableDiffusionGGML { else if (requested_size >= min_tile_dimension) { tile_size = requested_size; } + tile_size *= encoding_factor; return std::max(std::min(tile_size, latent_size), min_tile_dimension); }; @@ -1341,12 +1343,8 @@ class StableDiffusionGGML { if (!use_tiny_autoencoder) { float tile_overlap; int tile_size_x, tile_size_y; - get_tile_sizes(tile_size_x, tile_size_y, tile_overlap, vae_tiling_params, W, H); - - // TODO: also use an arg for this one? // multiply tile size for encode to keep the compute buffer size consistent - tile_size_x *= 1.30539; - tile_size_y *= 1.30539; + get_tile_sizes(tile_size_x, tile_size_y, tile_overlap, vae_tiling_params, W, H, 1.30539); LOG_DEBUG("VAE Tile size: %dx%d", tile_size_x, tile_size_y);