From 7388cfb3aa945f37350df063950291141e369343 Mon Sep 17 00:00:00 2001
From: Wagner Bruna <wbruna@users.noreply.github.com>
Date: Sat, 27 Sep 2025 18:50:54 -0300
Subject: [PATCH 1/3] fix: corrupted canny image memory buffer

The sd_image_t struct is passed by value, so the data pointer
can't be changed. Instead, just overwrite the image data area.
---
 ggml_extend.hpp   | 6 ++++--
 preprocessing.hpp | 4 +---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/ggml_extend.hpp b/ggml_extend.hpp
index a125357b4..e01d41bfe 100644
--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@@ -372,12 +372,14 @@ __STATIC_INLINE__ float sigmoid(float x) {
 
 // SPECIAL OPERATIONS WITH TENSORS
 
-__STATIC_INLINE__ uint8_t* sd_tensor_to_image(struct ggml_tensor* input) {
+__STATIC_INLINE__ uint8_t* sd_tensor_to_image(struct ggml_tensor* input, uint8_t* image_data = nullptr) {
     int64_t width    = input->ne[0];
     int64_t height   = input->ne[1];
     int64_t channels = input->ne[2];
     GGML_ASSERT(channels == 3 && input->type == GGML_TYPE_F32);
-    uint8_t* image_data = (uint8_t*)malloc(width * height * channels);
+    if (image_data == nullptr) {
+        image_data = (uint8_t*)malloc(width * height * channels);
+    }
     for (int iy = 0; iy < height; iy++) {
         for (int ix = 0; ix < width; ix++) {
             for (int k = 0; k < channels; k++) {
diff --git a/preprocessing.hpp b/preprocessing.hpp
index 9cace2f44..65797632c 100644
--- a/preprocessing.hpp
+++ b/preprocessing.hpp
@@ -218,9 +218,7 @@ bool preprocess_canny(sd_image_t img, float high_threshold, float low_threshold,
             ggml_tensor_set_f32(image, gray, ix, iy, 2);
         }
     }
-    uint8_t* output = sd_tensor_to_image(image);
-    free(img.data);
-    img.data = output;
+    sd_tensor_to_image(image, img.data);
     ggml_free(work_ctx);
     return true;
 }

From 0986a2105e9b06295c77226cb4cd2c22fb271163 Mon Sep 17 00:00:00 2001
From: Wagner Bruna <wbruna@users.noreply.github.com>
Date: Sat, 27 Sep 2025 19:28:35 -0300
Subject: [PATCH 2/3] fix: dynamically calculate canny preprocessing context
 sizes

Tested from 64x64 to 2048x2048.

Fixes #253
Fixes #642
---
 preprocessing.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/preprocessing.hpp b/preprocessing.hpp
index 65797632c..6f499b66d 100644
--- a/preprocessing.hpp
+++ b/preprocessing.hpp
@@ -6,7 +6,7 @@
 
 void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml_tensor* kernel, int padding) {
     struct ggml_init_params params;
-    params.mem_size                 = 20 * 1024 * 1024;  // 10
+    params.mem_size                 = 80 * input->ne[0] * input->ne[1]; // 20M for 512x512
     params.mem_buffer               = NULL;
     params.no_alloc                 = false;
     struct ggml_context* ctx0       = ggml_init(params);
@@ -164,7 +164,7 @@ void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float lo
 
 bool preprocess_canny(sd_image_t img, float high_threshold, float low_threshold, float weak, float strong, bool inverse) {
     struct ggml_init_params params;
-    params.mem_size               = static_cast<size_t>(10 * 1024 * 1024);  // 10MB
+    params.mem_size               = static_cast<size_t>(40 * img.width * img.height);  // 10MB for 512x512
     params.mem_buffer             = NULL;
     params.no_alloc               = false;
     struct ggml_context* work_ctx = ggml_init(params);

From 397c0ffa8975773782a5f656c4b778e17e589109 Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Mon, 13 Oct 2025 22:01:22 +0800
Subject: [PATCH 3/3] format code

---
 preprocessing.hpp    | 2 +-
 stable-diffusion.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/preprocessing.hpp b/preprocessing.hpp
index 6f499b66d..552aa6424 100644
--- a/preprocessing.hpp
+++ b/preprocessing.hpp
@@ -6,7 +6,7 @@
 
 void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml_tensor* kernel, int padding) {
     struct ggml_init_params params;
-    params.mem_size                 = 80 * input->ne[0] * input->ne[1]; // 20M for 512x512
+    params.mem_size                 = 80 * input->ne[0] * input->ne[1];  // 20M for 512x512
     params.mem_buffer               = NULL;
     params.no_alloc                 = false;
     struct ggml_context* ctx0       = ggml_init(params);
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index 62b40c6d0..68bfe9ac9 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -1434,7 +1434,7 @@ class StableDiffusionGGML {
             int ne3;
             if (sd_version_is_qwen_image(version)) {
                 ne2 = 1;
-                ne3 = C*x->ne[3];
+                ne3 = C * x->ne[3];
             } else {
                 if (!use_tiny_autoencoder) {
                     C *= 2;