From 77c679f6d62d4ce3406d8bee1b025dbd1e8b213e Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Sun, 5 Oct 2025 16:32:08 -0300 Subject: [PATCH 1/2] add support for Qwen Image Pruning --- qwen_image.hpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/qwen_image.hpp b/qwen_image.hpp index 2f5dad89..0289394d 100644 --- a/qwen_image.hpp +++ b/qwen_image.hpp @@ -492,6 +492,22 @@ namespace Qwen { bool flash_attn = false) : GGMLRunner(backend, offload_params_to_cpu) { qwen_image_params.flash_attn = flash_attn; + + int model_layers = 60; + int num_layers = 1; + for (int layer = model_layers; layer > num_layers; layer--) { + for (auto pair : tensor_types) { + if (pair.first.find("model.diffusion_model.transformer_blocks." + std::to_string(layer-1) + ".attn.add_k_proj.bias") != std::string::npos) { + num_layers = layer; + break; + } + } + } + if (num_layers < model_layers) { + LOG_INFO("Qwen Image: some layers missing, assuming pruned model"); + } + + qwen_image_params.num_layers = num_layers; qwen_image = QwenImageModel(qwen_image_params); qwen_image.init(params_ctx, tensor_types, prefix); } From b9d7b2b51f51c718399a6bcb93e6cfc082e963b9 Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Thu, 9 Oct 2025 08:11:39 -0300 Subject: [PATCH 2/2] follow the prefix parameter and avoid hardcoded max number --- qwen_image.hpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/qwen_image.hpp b/qwen_image.hpp index 0289394d..0f84ebea 100644 --- a/qwen_image.hpp +++ b/qwen_image.hpp @@ -493,11 +493,16 @@ namespace Qwen { : GGMLRunner(backend, offload_params_to_cpu) { qwen_image_params.flash_attn = flash_attn; - int model_layers = 60; - int num_layers = 1; + int model_layers = qwen_image_params.num_layers; + int num_layers = 1; + std::string layer_prefix = "transformer_blocks."; + if (prefix.size() > 0) { + layer_prefix = prefix + "." + layer_prefix; + } for (int layer = model_layers; layer > num_layers; layer--) { + std::string layer_name = layer_prefix + std::to_string(layer-1) + ".attn.add_k_proj.bias"; for (auto pair : tensor_types) { - if (pair.first.find("model.diffusion_model.transformer_blocks." + std::to_string(layer-1) + ".attn.add_k_proj.bias") != std::string::npos) { + if (pair.first.find(layer_name) != std::string::npos) { num_layers = layer; break; }