diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index 790032d60316..7ee357bdee20 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -1394,7 +1394,16 @@ class BackendServiceImpl final : public backend::Backend::Service { if (error) { return grpc::Status(grpc::StatusCode::INTERNAL, "Error in receiving results"); } + // Sort responses by score in descending order + std::sort(responses.begin(), responses.end(), [](const json& a, const json& b) { + return a.value("score", 0.0f) > b.value("score", 0.0f); + }); + // Crop results by request.top_n if specified + int top_n = request->top_n(); + if (top_n > 0 && top_n < static_cast(responses.size())) { + responses = json(responses.begin(), responses.begin() + top_n); + } // Set usage information backend::Usage* usage = rerankResult->mutable_usage(); int total_tokens = 0;