diff --git a/common.hpp b/common.hpp index dd8281f9e..33d499fb1 100644 --- a/common.hpp +++ b/common.hpp @@ -242,14 +242,18 @@ class FeedForward : public GGMLBlock { } // net_1 is nn.Dropout(), skip for inference - float scale = 1.f; + bool force_prec_f32 = false; + float scale = 1.f; if (precision_fix) { scale = 1.f / 128.f; +#ifdef SD_USE_VULKAN + force_prec_f32 = true; +#endif } // The purpose of the scale here is to prevent NaN issues in certain situations. // For example, when using Vulkan without enabling force_prec_f32, // or when using CUDA but the weights are k-quants. - blocks["net.2"] = std::shared_ptr(new Linear(inner_dim, dim_out, true, false, false, scale)); + blocks["net.2"] = std::shared_ptr(new Linear(inner_dim, dim_out, true, false, force_prec_f32, scale)); } struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) { diff --git a/qwen_image.hpp b/qwen_image.hpp index 94ada47d7..3e4a75e07 100644 --- a/qwen_image.hpp +++ b/qwen_image.hpp @@ -94,10 +94,14 @@ namespace Qwen { blocks["norm_added_q"] = std::shared_ptr(new RMSNorm(dim_head, eps)); blocks["norm_added_k"] = std::shared_ptr(new RMSNorm(dim_head, eps)); - float scale = 1.f / 32.f; + float scale = 1.f / 32.f; + bool force_prec_f32 = false; +#ifdef SD_USE_VULKAN + force_prec_f32 = true; +#endif // The purpose of the scale here is to prevent NaN issues in certain situations. // For example when using CUDA but the weights are k-quants (not all prompts). - blocks["to_out.0"] = std::shared_ptr(new Linear(inner_dim, out_dim, out_bias, false, false, scale)); + blocks["to_out.0"] = std::shared_ptr(new Linear(inner_dim, out_dim, out_bias, false, force_prec_f32, scale)); // to_out.1 is nn.Dropout blocks["to_add_out"] = std::shared_ptr(new Linear(inner_dim, out_context_dim, out_bias, false, false, scale));