From d455ffd2b15cd766ed6f4194884a1dac3cc5c5f4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 25 Nov 2025 22:15:34 +0100 Subject: [PATCH] chore(deps): bump llama.cpp to '583cb83416467e8abf9b37349dcf1f6a0083745a' Signed-off-by: Ettore Di Giacinto --- backend/cpp/llama-cpp/CMakeLists.txt | 2 +- backend/cpp/llama-cpp/Makefile | 2 +- backend/cpp/llama-cpp/grpc-server.cpp | 6 +++++- backend/cpp/llama-cpp/prepare.sh | 7 ++++--- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/backend/cpp/llama-cpp/CMakeLists.txt b/backend/cpp/llama-cpp/CMakeLists.txt index c839800b86af..d951fc9d6aae 100644 --- a/backend/cpp/llama-cpp/CMakeLists.txt +++ b/backend/cpp/llama-cpp/CMakeLists.txt @@ -57,7 +57,7 @@ add_library(hw_grpc_proto ${hw_proto_srcs} ${hw_proto_hdrs} ) -add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp httplib.h) +add_executable(${TARGET} grpc-server.cpp json.hpp httplib.h) target_include_directories(${TARGET} PRIVATE ../llava) target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}) diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 38cf72d4ef6b..1274bcf9d671 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -1,5 +1,5 @@ -LLAMA_VERSION?=0c7220db56525d40177fcce3baa0d083448ec813 +LLAMA_VERSION?=583cb83416467e8abf9b37349dcf1f6a0083745a LLAMA_REPO?=https://github.com/ggerganov/llama.cpp CMAKE_ARGS?= diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index 3fbcdae1853f..bd251caedf2d 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -8,6 +8,10 @@ // #include "server.cpp" +#include "server-task.cpp" +#include "server-queue.cpp" +#include "server-common.cpp" + // LocalAI #include "backend.pb.h" @@ -2134,7 +2138,7 @@ class BackendServiceImpl final : public backend::Backend::Service { tasks.reserve(documents.size()); for (size_t i = 0; i < documents.size(); i++) { - auto tmp = format_rerank(ctx_server.model, ctx_server.vocab, ctx_server.mctx, request->query(), documents[i]); + auto tmp = format_prompt_rerank(ctx_server.model, ctx_server.vocab, ctx_server.mctx, request->query(), documents[i]); server_task task = server_task(SERVER_TASK_TYPE_RERANK); task.id = ctx_server.queue_tasks.get_new_id(); task.index = i; diff --git a/backend/cpp/llama-cpp/prepare.sh b/backend/cpp/llama-cpp/prepare.sh index 985b8533bf99..a5cc79cff7b7 100644 --- a/backend/cpp/llama-cpp/prepare.sh +++ b/backend/cpp/llama-cpp/prepare.sh @@ -9,13 +9,14 @@ done set -e +for file in $(ls llama.cpp/tools/server/); do + cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/ +done + cp -r CMakeLists.txt llama.cpp/tools/grpc-server/ cp -r grpc-server.cpp llama.cpp/tools/grpc-server/ cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/ -cp -rfv llama.cpp/tools/server/utils.hpp llama.cpp/tools/grpc-server/ cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/ -cp -rfv llama.cpp/tools/server/server-http.cpp llama.cpp/tools/grpc-server/ -cp -rfv llama.cpp/tools/server/server-http.h llama.cpp/tools/grpc-server/ set +e if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then