From 98595a633262ae820c97899dd1cf4008bf3b35db Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 26 Sep 2025 22:06:26 +0200 Subject: [PATCH] chore(deps): bump llama.cpp to '835b2b915c52bcabcd688d025eacff9a07b65f52' Signed-off-by: Ettore Di Giacinto --- backend/cpp/llama-cpp/Makefile | 2 +- backend/cpp/llama-cpp/grpc-server.cpp | 12 +++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index f1bc8f51895c..f9faeb7a3918 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -1,5 +1,5 @@ -LLAMA_VERSION?=4ae88d07d026e66b41e85afece74e88af54f4e66 +LLAMA_VERSION?=835b2b915c52bcabcd688d025eacff9a07b65f52 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp CMAKE_ARGS?= diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index 93bb07e2a1ea..36565abf1ec5 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -802,11 +802,6 @@ class BackendServiceImpl final : public backend::Backend::Service { return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "\"documents\" must be a non-empty string array"); } - // Tokenize the query - auto tokenized_query = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, request->query(), /* add_special */ false, true); - if (tokenized_query.size() != 1) { - return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "\"query\" must contain only a single prompt"); - } // Create and queue the task json responses = json::array(); bool error = false; @@ -818,10 +813,9 @@ class BackendServiceImpl final : public backend::Backend::Service { documents.push_back(request->documents(i)); } - auto tokenized_docs = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, documents, /* add_special */ false, true); - tasks.reserve(tokenized_docs.size()); - for (size_t i = 0; i < tokenized_docs.size(); i++) { - auto tmp = format_rerank(ctx_server.vocab, tokenized_query[0], tokenized_docs[i]); + tasks.reserve(documents.size()); + for (size_t i = 0; i < documents.size(); i++) { + auto tmp = format_rerank(ctx_server.model, ctx_server.vocab, ctx_server.mctx, request->query(), documents[i]); server_task task = server_task(SERVER_TASK_TYPE_RERANK); task.id = ctx_server.queue_tasks.get_new_id(); task.index = i;