From be107efd2846735887592d384a075d8f9d8da3c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Thu, 30 Oct 2025 23:04:32 +0100 Subject: [PATCH 1/5] Fix inpainting masked image being broken by side effect --- stable-diffusion.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 9552cfc4..ced2513f 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -2532,8 +2532,6 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g sd_image_to_ggml_tensor(sd_img_gen_params->mask_image, mask_img); sd_image_to_ggml_tensor(sd_img_gen_params->init_image, init_img); - init_latent = sd_ctx->sd->encode_first_stage(work_ctx, init_img); - if (sd_version_is_inpaint(sd_ctx->sd->version)) { int64_t mask_channels = 1; if (sd_ctx->sd->version == VERSION_FLUX_FILL) { @@ -2548,8 +2546,10 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g ggml_tensor* masked_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1); ggml_ext_tensor_apply_mask(init_img, mask_img, masked_img); masked_latent = sd_ctx->sd->encode_first_stage(work_ctx, masked_img); + init_latent = sd_ctx->sd->encode_first_stage(work_ctx, init_img); } else { // mask after vae + init_latent = sd_ctx->sd->encode_first_stage(work_ctx, init_img); masked_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, init_latent->ne[0], init_latent->ne[1], init_latent->ne[2], 1); ggml_ext_tensor_apply_mask(init_latent, mask_img, masked_latent, 0.); } @@ -2593,6 +2593,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g } } } + } else { + init_latent = sd_ctx->sd->encode_first_stage(work_ctx, init_img); } { From 2350bd7c7bf4cec762516713eb8756e2976d2195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Fri, 31 Oct 2025 11:58:57 +0100 Subject: [PATCH 2/5] Fix unet inpainting concat not being set --- stable-diffusion.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index ced2513f..2c194126 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -2590,6 +2590,13 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g for (int k = 0; k < masked_latent->ne[2]; k++) { ggml_ext_tensor_set_f32(concat_latent, 0, ix, iy, masked_latent->ne[2] + 1 + k); } + } else { + float m = ggml_ext_tensor_get_f32(mask_img, mx, my); + ggml_ext_tensor_set_f32(concat_latent, m, ix, iy, 0); + for (int k = 0; k < masked_latent->ne[2];k++) { + float v = ggml_ext_tensor_get_f32(masked_latent, ix, iy, k); + ggml_ext_tensor_set_f32(concat_latent, v, ix, iy, k + mask_channels); + } } } } From af5bed2dcf63d102c1acfe35a5af490a61f09e54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Fri, 31 Oct 2025 12:24:05 +0100 Subject: [PATCH 3/5] Fix Flex.2 inpaint mode crash (+ use scale factor) --- stable-diffusion.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 2c194126..bc1342a8 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -2535,9 +2535,9 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g if (sd_version_is_inpaint(sd_ctx->sd->version)) { int64_t mask_channels = 1; if (sd_ctx->sd->version == VERSION_FLUX_FILL) { - mask_channels = 8 * 8; // flatten the whole mask + mask_channels = sd_ctx->sd->get_vae_scale_factor() * sd_ctx->sd->get_vae_scale_factor(); // flatten the whole mask } else if (sd_ctx->sd->version == VERSION_FLEX_2) { - mask_channels = 1 + init_latent->ne[2]; + mask_channels = 1 + sd_ctx->sd->get_latent_channel(); } ggml_tensor* masked_latent = nullptr; From f9eed2165600969344f384e7c56ddb92fc827489 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Fri, 31 Oct 2025 12:46:39 +0100 Subject: [PATCH 4/5] cleanup --- stable-diffusion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index bc1342a8..bbaf1aff 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -2535,7 +2535,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g if (sd_version_is_inpaint(sd_ctx->sd->version)) { int64_t mask_channels = 1; if (sd_ctx->sd->version == VERSION_FLUX_FILL) { - mask_channels = sd_ctx->sd->get_vae_scale_factor() * sd_ctx->sd->get_vae_scale_factor(); // flatten the whole mask + mask_channels = vae_scale_factor * vae_scale_factor; // flatten the whole mask } else if (sd_ctx->sd->version == VERSION_FLEX_2) { mask_channels = 1 + sd_ctx->sd->get_latent_channel(); } From 6c6faae5e839356f36572e9979c7aafe6b153e2c Mon Sep 17 00:00:00 2001 From: leejet Date: Sun, 2 Nov 2025 02:02:17 +0800 Subject: [PATCH 5/5] format code --- stable-diffusion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index bbaf1aff..9e38ae6e 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -2593,7 +2593,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g } else { float m = ggml_ext_tensor_get_f32(mask_img, mx, my); ggml_ext_tensor_set_f32(concat_latent, m, ix, iy, 0); - for (int k = 0; k < masked_latent->ne[2];k++) { + for (int k = 0; k < masked_latent->ne[2]; k++) { float v = ggml_ext_tensor_get_f32(masked_latent, ix, iy, k); ggml_ext_tensor_set_f32(concat_latent, v, ix, iy, k + mask_channels); }