diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 385864dd11d5d..390edfe864ce5 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -7100,7 +7100,7 @@ def set_gguf_parameters(self): else: # note: deepseek2 using MLA converts into MQA (ie: GQA with 1 group) self.hparams["num_key_value_heads"] = 1 - + super().set_gguf_parameters() hparams = self.hparams kv_lora_rank = hparams["q_lora_rank"] if hparams["q_lora_rank"] is not None else 512 diff --git a/examples/eval-callback/eval-callback.cpp b/examples/eval-callback/eval-callback.cpp index cefa39a57c886..ed181a1ab4500 100644 --- a/examples/eval-callback/eval-callback.cpp +++ b/examples/eval-callback/eval-callback.cpp @@ -74,19 +74,19 @@ static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne } } for (int64_t i3 = 0; i3 < ne[3]; i3++) { - LOG(" [\n"); + LOG(" [\n"); for (int64_t i2 = 0; i2 < ne[2]; i2++) { if (i2 == n && ne[2] > 2*n) { - LOG(" ..., \n"); + LOG(" ..., \n"); i2 = ne[2] - n; } - LOG(" [\n"); + LOG(" [\n"); for (int64_t i1 = 0; i1 < ne[1]; i1++) { if (i1 == n && ne[1] > 2*n) { - LOG(" ..., \n"); + LOG(" ..., \n"); i1 = ne[1] - n; } - LOG(" ["); + LOG(" ["); for (int64_t i0 = 0; i0 < ne[0]; i0++) { if (i0 == n && ne[0] > 2*n) { LOG("..., "); @@ -98,10 +98,10 @@ static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne } LOG("],\n"); } - LOG(" ],\n"); + LOG(" ],\n"); } - LOG(" ]\n"); - LOG(" sum = %f\n", sum); + LOG(" ]\n"); + LOG(" sum = %f\n", sum); } // TODO: make this abort configurable/optional? @@ -136,7 +136,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) { snprintf(src1_str, sizeof(src1_str), "%s{%s}", src1->name, ggml_ne_string(src1).c_str()); } - LOG("%s: %24s = (%s) %10s(%s{%s}, %s}) = {%s}\n", __func__, + LOG("%s: %16s = (%s) %10s(%s{%s}, %s}) = {%s}\n", __func__, t->name, ggml_type_name(t->type), ggml_op_desc(t), src0->name, ggml_ne_string(src0).c_str(), src1 ? src1_str : "", diff --git a/src/models/deepseek2.cpp b/src/models/deepseek2.cpp index 375f3594541bb..bc1b2127acd96 100644 --- a/src/models/deepseek2.cpp +++ b/src/models/deepseek2.cpp @@ -47,6 +47,7 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr // self_attention if (is_ocr) { const int n_embed_head = hparams.n_embd / hparams.n_head(); + const int ocr_rope_type = GGML_ROPE_TYPE_NEOX; GGML_ASSERT(n_embed_head == n_embd_head_k && n_embed_head == n_embd_head_v); ggml_tensor * Qcur = NULL; @@ -65,8 +66,8 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr Vcur = ggml_reshape_3d(ctx0, Vcur, n_embed_head, n_head, n_tokens); GGML_ASSERT(fabs(freq_base - 10000.0) < 1e-4); - Qcur = ggml_rope_ext(ctx0, Qcur, inp_pos, nullptr, n_embed_head, rope_type, 0, freq_base, 1, 0, 1, 0, 0); - Kcur = ggml_rope_ext(ctx0, Kcur, inp_pos, nullptr, n_embed_head, rope_type, 0, freq_base, 1, 0, 1, 0, 0); + Qcur = ggml_rope_ext(ctx0, Qcur, inp_pos, nullptr, n_embed_head, ocr_rope_type, 0, freq_base, 1, 0, 1, 0, 0); + Kcur = ggml_rope_ext(ctx0, Kcur, inp_pos, nullptr, n_embed_head, ocr_rope_type, 0, freq_base, 1, 0, 1, 0, 0); cb(Qcur, "q_pe", il); cb(Kcur, "k_pe", il);