From 5ff5b89d5d82b0c5db0f27af310ac3b3baac61f7 Mon Sep 17 00:00:00 2001 From: akleine Date: Tue, 11 Nov 2025 18:48:50 +0100 Subject: [PATCH 1/2] feat: make PhotoMakerV2 more robust by image count check --- stable-diffusion.cpp | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 4cea83a19..2ca46f09f 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -2454,18 +2454,24 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx, LOG_WARN("Turn off PhotoMaker"); sd_ctx->sd->stacked_id = false; } else { - id_cond.c_crossattn = sd_ctx->sd->id_encoder(work_ctx, init_img, id_cond.c_crossattn, id_embeds, class_tokens_mask); - int64_t t1 = ggml_time_ms(); - LOG_INFO("Photomaker ID Stacking, taking %" PRId64 " ms", t1 - t0); - if (sd_ctx->sd->free_params_immediately) { - sd_ctx->sd->pmid_model->free_params_buffer(); - } - // Encode input prompt without the trigger word for delayed conditioning - prompt_text_only = sd_ctx->sd->cond_stage_model->remove_trigger_from_prompt(work_ctx, prompt); - // printf("%s || %s \n", prompt.c_str(), prompt_text_only.c_str()); - prompt = prompt_text_only; // - if (sample_steps < 50) { - LOG_WARN("It's recommended to use >= 50 steps for photo maker!"); + if (pm_params.id_images_count != id_embeds->ne[1]) { + LOG_WARN("PhotoMaker image count (%d) does NOT match ID embeds (%d). You should run face_detect.py again.",pm_params.id_images_count,id_embeds->ne[1]); + LOG_WARN("Turn off PhotoMaker"); + sd_ctx->sd->stacked_id = false; + } else { + id_cond.c_crossattn = sd_ctx->sd->id_encoder(work_ctx, init_img, id_cond.c_crossattn, id_embeds, class_tokens_mask); + int64_t t1 = ggml_time_ms(); + LOG_INFO("Photomaker ID Stacking, taking %" PRId64 " ms", t1 - t0); + if (sd_ctx->sd->free_params_immediately) { + sd_ctx->sd->pmid_model->free_params_buffer(); + } + // Encode input prompt without the trigger word for delayed conditioning + prompt_text_only = sd_ctx->sd->cond_stage_model->remove_trigger_from_prompt(work_ctx, prompt); + // printf("%s || %s \n", prompt.c_str(), prompt_text_only.c_str()); + prompt = prompt_text_only; // + if (sample_steps < 50) { + LOG_WARN("It's recommended to use >= 50 steps for photo maker!"); + } } } } else { From d9300a782116e95379a1b40d0f4bfdeb8bb9d050 Mon Sep 17 00:00:00 2001 From: leejet Date: Sun, 16 Nov 2025 17:10:01 +0800 Subject: [PATCH 2/2] format code --- stable-diffusion.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 2ca46f09f..a58d10b23 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -2454,10 +2454,10 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx, LOG_WARN("Turn off PhotoMaker"); sd_ctx->sd->stacked_id = false; } else { - if (pm_params.id_images_count != id_embeds->ne[1]) { - LOG_WARN("PhotoMaker image count (%d) does NOT match ID embeds (%d). You should run face_detect.py again.",pm_params.id_images_count,id_embeds->ne[1]); - LOG_WARN("Turn off PhotoMaker"); - sd_ctx->sd->stacked_id = false; + if (pm_params.id_images_count != id_embeds->ne[1]) { + LOG_WARN("PhotoMaker image count (%d) does NOT match ID embeds (%d). You should run face_detect.py again.", pm_params.id_images_count, id_embeds->ne[1]); + LOG_WARN("Turn off PhotoMaker"); + sd_ctx->sd->stacked_id = false; } else { id_cond.c_crossattn = sd_ctx->sd->id_encoder(work_ctx, init_img, id_cond.c_crossattn, id_embeds, class_tokens_mask); int64_t t1 = ggml_time_ms();