diff --git a/ggml_extend.hpp b/ggml_extend.hpp index a125357b4..e01d41bfe 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -372,12 +372,14 @@ __STATIC_INLINE__ float sigmoid(float x) { // SPECIAL OPERATIONS WITH TENSORS -__STATIC_INLINE__ uint8_t* sd_tensor_to_image(struct ggml_tensor* input) { +__STATIC_INLINE__ uint8_t* sd_tensor_to_image(struct ggml_tensor* input, uint8_t* image_data = nullptr) { int64_t width = input->ne[0]; int64_t height = input->ne[1]; int64_t channels = input->ne[2]; GGML_ASSERT(channels == 3 && input->type == GGML_TYPE_F32); - uint8_t* image_data = (uint8_t*)malloc(width * height * channels); + if (image_data == nullptr) { + image_data = (uint8_t*)malloc(width * height * channels); + } for (int iy = 0; iy < height; iy++) { for (int ix = 0; ix < width; ix++) { for (int k = 0; k < channels; k++) { diff --git a/preprocessing.hpp b/preprocessing.hpp index 9cace2f44..552aa6424 100644 --- a/preprocessing.hpp +++ b/preprocessing.hpp @@ -6,7 +6,7 @@ void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml_tensor* kernel, int padding) { struct ggml_init_params params; - params.mem_size = 20 * 1024 * 1024; // 10 + params.mem_size = 80 * input->ne[0] * input->ne[1]; // 20M for 512x512 params.mem_buffer = NULL; params.no_alloc = false; struct ggml_context* ctx0 = ggml_init(params); @@ -164,7 +164,7 @@ void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float lo bool preprocess_canny(sd_image_t img, float high_threshold, float low_threshold, float weak, float strong, bool inverse) { struct ggml_init_params params; - params.mem_size = static_cast(10 * 1024 * 1024); // 10MB + params.mem_size = static_cast(40 * img.width * img.height); // 10MB for 512x512 params.mem_buffer = NULL; params.no_alloc = false; struct ggml_context* work_ctx = ggml_init(params); @@ -218,9 +218,7 @@ bool preprocess_canny(sd_image_t img, float high_threshold, float low_threshold, ggml_tensor_set_f32(image, gray, ix, iy, 2); } } - uint8_t* output = sd_tensor_to_image(image); - free(img.data); - img.data = output; + sd_tensor_to_image(image, img.data); ggml_free(work_ctx); return true; } diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 62b40c6d0..68bfe9ac9 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -1434,7 +1434,7 @@ class StableDiffusionGGML { int ne3; if (sd_version_is_qwen_image(version)) { ne2 = 1; - ne3 = C*x->ne[3]; + ne3 = C * x->ne[3]; } else { if (!use_tiny_autoencoder) { C *= 2;