diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 3ff654afb..1a0bd4468 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -1141,6 +1141,14 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_ones(struct ggml_context* ctx, } __STATIC_INLINE__ ggml_tensor* ggml_ext_cast_f32(ggml_context* ctx, ggml_tensor* a) { +#ifdef SD_USE_VULKAN + auto zero_index = ggml_get_tensor(ctx, "ggml_runner_build_in_tensor:zero_int"); + auto out = ggml_reshape_1d(ctx, a, ggml_nelements(a)); + out = ggml_get_rows(ctx, out, zero_index); + out = ggml_reshape(ctx, out, a); + // auto out = ggml_cast(ctx, a, GGML_TYPE_F32); + return out; +#else auto out = ggml_reshape_2d(ctx, a, 1, ggml_nelements(a)); ggml_tensor* one = ggml_ext_ones(ctx, 1, 1, 1, 1); // [1,] if (ggml_is_transposed(out)) { @@ -1148,7 +1156,8 @@ __STATIC_INLINE__ ggml_tensor* ggml_ext_cast_f32(ggml_context* ctx, ggml_tensor* } else { out = ggml_mul_mat(ctx, out, one); } - out = ggml_reshape(ctx, out, a); + out = ggml_reshape(ctx, out, a); +#endif return out; } @@ -1556,6 +1565,9 @@ struct GGMLRunner { std::vector one_vec = {1.f}; ggml_tensor* one_tensor = nullptr; + std::vector zero_int_vec = {0}; + ggml_tensor* zero_int_tensor = nullptr; + std::map backend_tensor_data_map; std::map cache_tensor_map; // name -> tensor const std::string final_result_name = "ggml_runner_final_result_tensor"; @@ -1626,10 +1638,15 @@ struct GGMLRunner { one_tensor = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, 1); ggml_set_name(one_tensor, "ggml_runner_build_in_tensor:one"); set_backend_tensor_data(one_tensor, one_vec.data()); + + zero_int_tensor = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_I32, 1); + ggml_set_name(zero_int_tensor, "ggml_runner_build_in_tensor:zero_int"); + set_backend_tensor_data(zero_int_tensor, zero_int_vec.data()); } void prepare_build_in_tensor_after(struct ggml_cgraph* gf) { ggml_build_forward_expand(gf, one_tensor); + ggml_build_forward_expand(gf, zero_int_tensor); } struct ggml_cgraph* new_graph_custom(size_t graph_size) {