diff --git a/.github/backend-matrix.yml b/.github/backend-matrix.yml index 44de7b62b10a..62dfd9d3ffad 100644 --- a/.github/backend-matrix.yml +++ b/.github/backend-matrix.yml @@ -690,6 +690,19 @@ include: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "8" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-rfdetr-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "rfdetr-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "8" @@ -1491,6 +1504,19 @@ include: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-rfdetr-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "rfdetr-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -1504,6 +1530,19 @@ include: backend: "sam3-cpp" dockerfile: "./backend/Dockerfile.golang" context: "./" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/arm64' + skip-drivers: 'false' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-cuda-13-arm64-rfdetr-cpp' + base-image: "ubuntu:24.04" + ubuntu-version: '2404' + runs-on: 'ubuntu-24.04-arm' + backend: "rfdetr-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -2635,6 +2674,74 @@ include: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' + # rfdetr-cpp + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-cpu-rfdetr-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "rfdetr-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'sycl_f32' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-intel-sycl-f32-rfdetr-cpp' + runs-on: 'ubuntu-latest' + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" + skip-drivers: 'false' + backend: "rfdetr-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'sycl_f16' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-intel-sycl-f16-rfdetr-cpp' + runs-on: 'ubuntu-latest' + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" + skip-drivers: 'false' + backend: "rfdetr-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'vulkan' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + platform-tag: 'amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-vulkan-rfdetr-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "rfdetr-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'vulkan' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/arm64' + platform-tag: 'arm64' + tag-latest: 'auto' + tag-suffix: '-gpu-vulkan-rfdetr-cpp' + runs-on: 'ubuntu-24.04-arm' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "rfdetr-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -2715,6 +2822,19 @@ include: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2204' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/arm64' + skip-drivers: 'false' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-arm64-rfdetr-cpp' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'ubuntu-24.04-arm' + backend: "rfdetr-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2204' # whisper - build-type: '' cuda-major-version: "" diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index c8b95fb03636..3754d2b39e98 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -50,6 +50,10 @@ jobs: variable: "SAM3_VERSION" branch: "main" file: "backend/go/sam3-cpp/Makefile" + - repository: "mudler/rf-detr.cpp" + variable: "RFDETR_VERSION" + branch: "main" + file: "backend/go/rfdetr-cpp/Makefile" - repository: "predict-woo/qwen3-tts.cpp" variable: "QWEN3TTS_CPP_VERSION" branch: "main" diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 3fd8365747e5..723ef57070cb 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -37,6 +37,7 @@ jobs: sglang: ${{ steps.detect.outputs.sglang }} acestep-cpp: ${{ steps.detect.outputs.acestep-cpp }} qwen3-tts-cpp: ${{ steps.detect.outputs.qwen3-tts-cpp }} + rfdetr-cpp: ${{ steps.detect.outputs.rfdetr-cpp }} vibevoice-cpp: ${{ steps.detect.outputs.vibevoice-cpp }} localvqe: ${{ steps.detect.outputs.localvqe }} voxtral: ${{ steps.detect.outputs.voxtral }} @@ -843,6 +844,42 @@ jobs: - name: Test qwen3-tts-cpp run: | make --jobs=5 --output-sync=target -C backend/go/qwen3-tts-cpp test + # Per-backend smoke for rfdetr-cpp: builds the .so + Go binary and runs + # `make -C backend/go/rfdetr-cpp test`. test.sh fetches the small (~20 MB) + # rfdetr-nano-q8_0 GGUF from the published mudler/rfdetr-cpp-nano HF repo + # via curl and synthesises a tiny PNG to exercise the wire protocol. + tests-rfdetr-cpp: + needs: detect-changes + if: needs.detect-changes.outputs.rfdetr-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true' + runs-on: ubuntu-latest + steps: + - name: Clone + uses: actions/checkout@v6 + with: + submodules: true + - name: Dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential cmake curl libopenblas-dev + - name: Setup Go + uses: actions/setup-go@v5 + - name: Display Go version + run: go version + - name: Proto Dependencies + run: | + # Install protoc + curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ + unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ + rm protoc.zip + go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af + PATH="$PATH:$HOME/go/bin" make protogen-go + - name: Build rfdetr-cpp + run: | + make --jobs=5 --output-sync=target -C backend/go/rfdetr-cpp + - name: Test rfdetr-cpp + run: | + make --jobs=5 --output-sync=target -C backend/go/rfdetr-cpp test # Per-backend smoke for vibevoice-cpp: builds the .so + Go binary and # runs `make -C backend/go/vibevoice-cpp test`. test.sh auto-downloads # the published mudler/vibevoice.cpp-models bundle (TTS Q8_0 + ASR Q4_K diff --git a/Makefile b/Makefile index 3eebc1871925..793f623b607b 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Disable parallel execution for backend builds -.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio +.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/rfdetr-cpp backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio GOCMD=go GOTEST=$(GOCMD) test @@ -481,6 +481,7 @@ prepare-test-extra: protogen-python $(MAKE) -C backend/python/insightface $(MAKE) -C backend/python/speaker-recognition $(MAKE) -C backend/rust/kokoros kokoros-grpc + $(MAKE) -C backend/go/rfdetr-cpp test-extra: prepare-test-extra $(MAKE) -C backend/python/transformers test @@ -507,6 +508,7 @@ test-extra: prepare-test-extra $(MAKE) -C backend/python/insightface test $(MAKE) -C backend/python/speaker-recognition test $(MAKE) -C backend/rust/kokoros test + $(MAKE) -C backend/go/rfdetr-cpp test ## ## End-to-end gRPC tests that exercise a built backend container image. @@ -1119,6 +1121,7 @@ BACKEND_KOKOROS = kokoros|rust|.|false|true # C++ backends (Go wrapper with purego) BACKEND_SAM3_CPP = sam3-cpp|golang|.|false|true +BACKEND_RFDETR_CPP = rfdetr-cpp|golang|.|false|true # Helper function to build docker image for a backend # Usage: $(call docker-build-backend,BACKEND_NAME,DOCKERFILE_TYPE,BUILD_CONTEXT,PROGRESS_FLAG,NEEDS_BACKEND_ARG) @@ -1198,13 +1201,14 @@ $(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP_QUANTIZATION))) $(eval $(call generate-docker-build-target,$(BACKEND_TINYGRAD))) $(eval $(call generate-docker-build-target,$(BACKEND_KOKOROS))) $(eval $(call generate-docker-build-target,$(BACKEND_SAM3_CPP))) +$(eval $(call generate-docker-build-target,$(BACKEND_RFDETR_CPP))) $(eval $(call generate-docker-build-target,$(BACKEND_SHERPA_ONNX))) # Pattern rule for docker-save targets docker-save-%: backend-images docker save local-ai-backend:$* -o backend-images/$*.tar -docker-build-backends: docker-build-llama-cpp docker-build-ik-llama-cpp docker-build-turboquant docker-build-ds4 docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-sglang docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-liquid-audio docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-tinygrad docker-build-kokoros docker-build-sam3-cpp docker-build-qwen3-tts-cpp docker-build-vibevoice-cpp docker-build-localvqe docker-build-insightface docker-build-speaker-recognition docker-build-sherpa-onnx docker-build-cloud-proxy +docker-build-backends: docker-build-llama-cpp docker-build-ik-llama-cpp docker-build-turboquant docker-build-ds4 docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-sglang docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-liquid-audio docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-tinygrad docker-build-kokoros docker-build-sam3-cpp docker-build-rfdetr-cpp docker-build-qwen3-tts-cpp docker-build-vibevoice-cpp docker-build-localvqe docker-build-insightface docker-build-speaker-recognition docker-build-sherpa-onnx docker-build-cloud-proxy ######################################################## ### Mock Backend for E2E Tests diff --git a/backend/go/rfdetr-cpp/.gitignore b/backend/go/rfdetr-cpp/.gitignore new file mode 100644 index 000000000000..c5eb3d0d11f4 --- /dev/null +++ b/backend/go/rfdetr-cpp/.gitignore @@ -0,0 +1,7 @@ +sources/ +build*/ +package/ +librfdetrcpp*.so +rfdetr-cpp +test-models/ +test-data/ diff --git a/backend/go/rfdetr-cpp/CMakeLists.txt b/backend/go/rfdetr-cpp/CMakeLists.txt new file mode 100644 index 000000000000..aaf8cf62580d --- /dev/null +++ b/backend/go/rfdetr-cpp/CMakeLists.txt @@ -0,0 +1,79 @@ +cmake_minimum_required(VERSION 3.18) +project(librfdetrcpp LANGUAGES C CXX) + +set(CMAKE_POSITION_INDEPENDENT_CODE ON) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Static-link ggml + rfdetr so the resulting .so has no runtime dependency on +# extra ggml/rfdetr shared libraries — only on libc/libstdc++/libgomp, which +# the LocalAI package step bundles into the docker image. +set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build static libraries" FORCE) + +# rfdetr.cpp build switches: skip CLI/tests, keep static lib. +set(RFDETR_BUILD_CLI OFF CACHE BOOL "Disable rfdetr CLI" FORCE) +set(RFDETR_BUILD_TESTS OFF CACHE BOOL "Disable rfdetr tests" FORCE) +set(RFDETR_SHARED OFF CACHE BOOL "Build rfdetr as static lib" FORCE) + +# rt-detr.cpp's top-level CMakeLists invokes +# `bash ${CMAKE_SOURCE_DIR}/scripts/apply_ggml_patches.sh` to apply its +# in-tree ggml patches before descending into the submodule. When we +# `add_subdirectory` it from a parent project, `CMAKE_SOURCE_DIR` points +# at *our* directory, not theirs, so the script path resolves wrong. +# +# Run the patches script ourselves up front (it's idempotent — re-running +# is a no-op once patches are applied) so the rt-detr.cpp configure step +# is essentially a no-op for the patch hook. +set(RFDETR_CPP_SRC ${CMAKE_CURRENT_SOURCE_DIR}/sources/rt-detr.cpp) +if(EXISTS ${RFDETR_CPP_SRC}/scripts/apply_ggml_patches.sh) + execute_process( + COMMAND bash ${RFDETR_CPP_SRC}/scripts/apply_ggml_patches.sh + RESULT_VARIABLE _rfdetr_patch_result + OUTPUT_VARIABLE _rfdetr_patch_output + ERROR_VARIABLE _rfdetr_patch_error + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_STRIP_TRAILING_WHITESPACE) + if(NOT _rfdetr_patch_result EQUAL 0) + message(FATAL_ERROR + "Failed to apply ggml patches (exit ${_rfdetr_patch_result}):\n" + "stdout:\n${_rfdetr_patch_output}\n" + "stderr:\n${_rfdetr_patch_error}") + endif() + message(STATUS "${_rfdetr_patch_output}") +endif() + +# Stage a shim 'scripts/apply_ggml_patches.sh' under our source dir so that +# rt-detr.cpp's CMakeLists — which calls +# bash ${CMAKE_SOURCE_DIR}/scripts/apply_ggml_patches.sh +# — finds an idempotent no-op there. The real patches have already been +# applied above; this just satisfies the path lookup. +file(MAKE_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts) +file(WRITE ${CMAKE_CURRENT_SOURCE_DIR}/scripts/apply_ggml_patches.sh +"#!/usr/bin/env bash +# Shim - patches were already applied by the parent CMakeLists. +exit 0 +") +execute_process(COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/scripts/apply_ggml_patches.sh) + +add_subdirectory(./sources/rt-detr.cpp) + +# rfdetr.cpp's C-API symbols already live inside librfdetr (src/rfdetr_capi.cpp +# is compiled into the lib). We re-export them via a MODULE library that +# whole-archive-links rfdetr so the symbols are visible at dlopen time. +add_library(rfdetrcpp MODULE + sources/rt-detr.cpp/src/rfdetr_capi.cpp) + +target_include_directories(rfdetrcpp PRIVATE + sources/rt-detr.cpp/include + sources/rt-detr.cpp/src + sources/rt-detr.cpp/third_party/stb +) + +target_link_libraries(rfdetrcpp PRIVATE rfdetr ggml) + +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0) + target_link_libraries(rfdetrcpp PRIVATE stdc++fs) +endif() + +set_property(TARGET rfdetrcpp PROPERTY CXX_STANDARD 17) +set_target_properties(rfdetrcpp PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) diff --git a/backend/go/rfdetr-cpp/Makefile b/backend/go/rfdetr-cpp/Makefile new file mode 100644 index 000000000000..413d0085d8ee --- /dev/null +++ b/backend/go/rfdetr-cpp/Makefile @@ -0,0 +1,135 @@ +CMAKE_ARGS?= +BUILD_TYPE?= +NATIVE?=false + +GOCMD?=go +GO_TAGS?= +JOBS?=$(shell nproc --ignore=1) + +# rt-detr.cpp (GitHub redirects the historical mudler/rt-detr.cpp to the new +# mudler/rf-detr.cpp slug). Pin to a specific commit if you need a stable +# build; leaving this on `master` always picks up the latest C-API surface +# (incl. the per-detection accessor functions used by gorfdetrcpp.go). +RFDETR_REPO?=https://github.com/mudler/rf-detr.cpp.git +RFDETR_VERSION?=main + +ifeq ($(NATIVE),false) + CMAKE_ARGS+=-DGGML_NATIVE=OFF +endif + +# Forward LocalAI's BUILD_TYPE to the matching ggml backend switch. +ifeq ($(BUILD_TYPE),cublas) + CMAKE_ARGS+=-DGGML_CUDA=ON -DRFDETR_GGML_CUDA=ON +else ifeq ($(BUILD_TYPE),openblas) + CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS +else ifeq ($(BUILD_TYPE),clblas) + CMAKE_ARGS+=-DGGML_CLBLAST=ON +else ifeq ($(BUILD_TYPE),hipblas) + ROCM_HOME ?= /opt/rocm + ROCM_PATH ?= /opt/rocm + export CXX=$(ROCM_HOME)/llvm/bin/clang++ + export CC=$(ROCM_HOME)/llvm/bin/clang + AMDGPU_TARGETS?=gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 + CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DRFDETR_GGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) +else ifeq ($(BUILD_TYPE),vulkan) + CMAKE_ARGS+=-DGGML_VULKAN=ON -DRFDETR_GGML_VULKAN=ON +else ifeq ($(OS),Darwin) + ifneq ($(BUILD_TYPE),metal) + CMAKE_ARGS+=-DGGML_METAL=OFF + else + CMAKE_ARGS+=-DGGML_METAL=ON + CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON + CMAKE_ARGS+=-DRFDETR_GGML_METAL=ON + endif +endif + +ifeq ($(BUILD_TYPE),sycl_f16) + CMAKE_ARGS+=-DGGML_SYCL=ON \ + -DCMAKE_C_COMPILER=icx \ + -DCMAKE_CXX_COMPILER=icpx \ + -DGGML_SYCL_F16=ON +endif + +ifeq ($(BUILD_TYPE),sycl_f32) + CMAKE_ARGS+=-DGGML_SYCL=ON \ + -DCMAKE_C_COMPILER=icx \ + -DCMAKE_CXX_COMPILER=icpx +endif + +sources/rt-detr.cpp: + mkdir -p sources && \ + git clone --recursive $(RFDETR_REPO) sources/rt-detr.cpp && \ + cd sources/rt-detr.cpp && \ + git checkout $(RFDETR_VERSION) && \ + git submodule update --init --recursive --depth 1 --single-branch + +# Detect OS +UNAME_S := $(shell uname -s) + +# Only build CPU variants on Linux +ifeq ($(UNAME_S),Linux) + VARIANT_TARGETS = librfdetrcpp-avx.so librfdetrcpp-avx2.so librfdetrcpp-avx512.so librfdetrcpp-fallback.so +else + # On non-Linux (e.g., Darwin), build only fallback variant + VARIANT_TARGETS = librfdetrcpp-fallback.so +endif + +rfdetr-cpp: main.go gorfdetrcpp.go $(VARIANT_TARGETS) + CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o rfdetr-cpp ./ + +package: rfdetr-cpp + bash package.sh + +build: package + +clean: purge + rm -rf librfdetrcpp*.so rfdetr-cpp package sources + +purge: + rm -rf build* + +# Build all variants (Linux only) +ifeq ($(UNAME_S),Linux) +librfdetrcpp-avx.so: sources/rt-detr.cpp + rm -rfv build-$@ + $(info ${GREEN}I rfdetr-cpp build info:avx${RESET}) + SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) librfdetrcpp-custom + rm -rfv build-$@ + +librfdetrcpp-avx2.so: sources/rt-detr.cpp + rm -rfv build-$@ + $(info ${GREEN}I rfdetr-cpp build info:avx2${RESET}) + SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) librfdetrcpp-custom + rm -rfv build-$@ + +librfdetrcpp-avx512.so: sources/rt-detr.cpp + rm -rfv build-$@ + $(info ${GREEN}I rfdetr-cpp build info:avx512${RESET}) + SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) librfdetrcpp-custom + rm -rfv build-$@ +endif + +# Build fallback variant (all platforms) +librfdetrcpp-fallback.so: sources/rt-detr.cpp + rm -rfv build-$@ + $(info ${GREEN}I rfdetr-cpp build info:fallback${RESET}) + SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) librfdetrcpp-custom + rm -rfv build-$@ + +librfdetrcpp-custom: CMakeLists.txt + mkdir -p build-$(SO_TARGET) && \ + cd build-$(SO_TARGET) && \ + cmake .. $(CMAKE_ARGS) && \ + cmake --build . --config Release -j$(JOBS) && \ + cd .. && \ + mv build-$(SO_TARGET)/librfdetrcpp.so ./$(SO_TARGET) + +all: rfdetr-cpp package + +# `test` is invoked by the top-level Makefile's `test-extra` target. It builds +# the backend binary + the fallback shared library (needed for dlopen at +# runtime), then runs test.sh which downloads the test models + COCO image +# and exercises the gRPC Load/Detect wire path via the Go smoke test in +# main_test.go for both the detection and segmentation models. +test: rfdetr-cpp librfdetrcpp-fallback.so + bash test.sh diff --git a/backend/go/rfdetr-cpp/gorfdetrcpp.go b/backend/go/rfdetr-cpp/gorfdetrcpp.go new file mode 100644 index 000000000000..ae45319e188c --- /dev/null +++ b/backend/go/rfdetr-cpp/gorfdetrcpp.go @@ -0,0 +1,195 @@ +package main + +// gorfdetrcpp.go - gRPC handlers (Load, Detect) for the rfdetr-cpp backend. +// +// Embeds base.SingleThread to default unimplemented RPCs to "not supported" +// while we only implement object detection. + +import ( + "encoding/base64" + "fmt" + "os" + "path/filepath" + "strconv" + "unsafe" + + "github.com/mudler/LocalAI/pkg/grpc/base" + pb "github.com/mudler/LocalAI/pkg/grpc/proto" +) + +// Default upper bound on detections returned per image. RF-DETR's decoder +// queries are limited to a few hundred; 300 is a safe ceiling. +const defaultTopK = 300 + +// rfdetr_handle_t is a uintptr-typed opaque handle (see include/rfdetr_capi.h). +var ( + // rfdetr_capi_load(const char* model_path, int n_threads, rfdetr_handle_t* out_handle) -> int + CapiLoad func(modelPath string, nThreads int32, outHandle *uintptr) int32 + // rfdetr_capi_unload(rfdetr_handle_t handle) -> int + CapiUnload func(handle uintptr) int32 + // rfdetr_capi_detect_path(handle, image_path, threshold, top_k, out_json) -> int + CapiDetectPath func(handle uintptr, imagePath string, threshold float32, topK uint32, outJSON *uintptr) int32 + // rfdetr_capi_detect_buffer(handle, bytes, len, threshold, top_k, out_json) -> int + CapiDetectBuffer func(handle uintptr, bytes uintptr, length uintptr, threshold float32, topK uint32, outJSON *uintptr) int32 + // rfdetr_capi_free_string(char* s) + CapiFreeString func(s uintptr) + // rfdetr_capi_get_n_detections(handle) -> int + CapiGetNDetections func(handle uintptr) int32 + // rfdetr_capi_get_detection_class_id(handle, i) -> int + CapiGetDetectionClassID func(handle uintptr, i int32) int32 + // rfdetr_capi_get_detection_box(handle, i, out_xyxy[4]) -> int (0 on success) + CapiGetDetectionBox func(handle uintptr, i int32, outXYXY uintptr) int32 + // rfdetr_capi_get_detection_score(handle, i) -> float + CapiGetDetectionScore func(handle uintptr, i int32) float32 + // rfdetr_capi_get_detection_class_name(handle, i, buf, buf_size) -> int (needed/written; two-call sizing) + CapiGetDetectionClassName func(handle uintptr, i int32, buf uintptr, bufSize int32) int32 + // rfdetr_capi_get_detection_mask_png(handle, i, buf, buf_size) -> int (needed/written; 0 means no mask) + CapiGetDetectionMaskPNG func(handle uintptr, i int32, buf uintptr, bufSize int32) int32 +) + +type RFDetrCpp struct { + base.SingleThread + handle uintptr +} + +// Load loads the GGUF model at opts.ModelFile (joined with opts.ModelPath if relative) +// and stores the handle for later Detect calls. +func (r *RFDetrCpp) Load(opts *pb.ModelOptions) error { + modelFile := opts.ModelFile + if modelFile == "" { + modelFile = opts.Model + } + if modelFile == "" { + return fmt.Errorf("rfdetr-cpp: ModelFile is empty") + } + + var modelPath string + if filepath.IsAbs(modelFile) { + modelPath = modelFile + } else { + modelPath = filepath.Join(opts.ModelPath, modelFile) + } + + if _, err := os.Stat(modelPath); err != nil { + return fmt.Errorf("rfdetr-cpp: model file not found: %s: %w", modelPath, err) + } + + threads := opts.Threads + if threads <= 0 { + threads = 4 + } + + // Release previous model if any (re-Load). + if r.handle != 0 { + CapiUnload(r.handle) + r.handle = 0 + } + + var h uintptr + rc := CapiLoad(modelPath, threads, &h) + if rc != 0 || h == 0 { + return fmt.Errorf("rfdetr-cpp: rfdetr_capi_load failed with rc=%d for %s", rc, modelPath) + } + r.handle = h + return nil +} + +// Detect runs object detection on the base64-encoded image in opts.Src at +// opts.Threshold, returning one pb.Detection per result. Seg models also +// populate Detection.Mask with PNG-encoded mask bytes. +func (r *RFDetrCpp) Detect(opts *pb.DetectOptions) (pb.DetectResponse, error) { + if r.handle == 0 { + return pb.DetectResponse{}, fmt.Errorf("rfdetr-cpp: model not loaded") + } + + // Decode base64 image and write to temp file. + imgData, err := base64.StdEncoding.DecodeString(opts.Src) + if err != nil { + return pb.DetectResponse{}, fmt.Errorf("rfdetr-cpp: failed to decode base64 image: %w", err) + } + + tmpFile, err := os.CreateTemp("", "rfdetr-*.img") + if err != nil { + return pb.DetectResponse{}, fmt.Errorf("rfdetr-cpp: failed to create temp file: %w", err) + } + defer func() { _ = os.Remove(tmpFile.Name()) }() + + if _, err := tmpFile.Write(imgData); err != nil { + _ = tmpFile.Close() + return pb.DetectResponse{}, fmt.Errorf("rfdetr-cpp: failed to write temp file: %w", err) + } + if err := tmpFile.Close(); err != nil { + return pb.DetectResponse{}, fmt.Errorf("rfdetr-cpp: failed to close temp file: %w", err) + } + + threshold := opts.Threshold + if threshold <= 0 { + threshold = 0.5 + } + + // JSON output from detect_path is unused: we read structured detections via + // the accessor functions. Still must free the returned string. + var jsonPtr uintptr + rc := CapiDetectPath(r.handle, tmpFile.Name(), threshold, uint32(defaultTopK), &jsonPtr) + if jsonPtr != 0 { + CapiFreeString(jsonPtr) + } + if rc != 0 { + return pb.DetectResponse{}, fmt.Errorf("rfdetr-cpp: detect failed with rc=%d", rc) + } + + n := CapiGetNDetections(r.handle) + if n < 0 { + return pb.DetectResponse{}, fmt.Errorf("rfdetr-cpp: invalid n_detections=%d", n) + } + + detections := make([]*pb.Detection, 0, n) + for i := int32(0); i < n; i++ { + var bbox [4]float32 // x1, y1, x2, y2 + if rc := CapiGetDetectionBox(r.handle, i, uintptr(unsafe.Pointer(&bbox[0]))); rc != 0 { + continue + } + cid := CapiGetDetectionClassID(r.handle, i) + score := CapiGetDetectionScore(r.handle, i) + + // Two-call sizing for class_name. + var className string + nameSize := CapiGetDetectionClassName(r.handle, i, 0, 0) + if nameSize > 1 { + buf := make([]byte, nameSize) + written := CapiGetDetectionClassName(r.handle, i, uintptr(unsafe.Pointer(&buf[0])), nameSize) + // `written` is the same number (needed bytes including NUL); strip NUL. + if written > 0 && int(written) <= len(buf) { + className = string(buf[:written-1]) + } else { + className = string(buf[:len(buf)-1]) + } + } + if className == "" { + className = strconv.Itoa(int(cid)) + } + + // Two-call sizing for mask PNG (returns 0 when no mask). + var mask []byte + maskSize := CapiGetDetectionMaskPNG(r.handle, i, 0, 0) + if maskSize > 0 { + maskBuf := make([]byte, maskSize) + CapiGetDetectionMaskPNG(r.handle, i, uintptr(unsafe.Pointer(&maskBuf[0])), maskSize) + mask = maskBuf + } + + detections = append(detections, &pb.Detection{ + X: bbox[0], + Y: bbox[1], + Width: bbox[2] - bbox[0], + Height: bbox[3] - bbox[1], + Confidence: score, + ClassName: className, + Mask: mask, + }) + } + + return pb.DetectResponse{ + Detections: detections, + }, nil +} diff --git a/backend/go/rfdetr-cpp/main.go b/backend/go/rfdetr-cpp/main.go new file mode 100644 index 000000000000..3c95df1c257a --- /dev/null +++ b/backend/go/rfdetr-cpp/main.go @@ -0,0 +1,61 @@ +package main + +// main.go - entry point for the rfdetr-cpp gRPC backend. +// +// Dlopens librfdetrcpp-.so via purego at the path in +// RFDETR_LIBRARY (set by run.sh based on /proc/cpuinfo), registers the +// rfdetr_capi_* C ABI symbols, then starts the gRPC server. + +import ( + "flag" + "os" + + "github.com/ebitengine/purego" + grpc "github.com/mudler/LocalAI/pkg/grpc" +) + +var ( + addr = flag.String("addr", "localhost:50051", "the address to connect to") +) + +type LibFuncs struct { + FuncPtr any + Name string +} + +func main() { + // Get library name from environment variable, default to fallback + libName := os.Getenv("RFDETR_LIBRARY") + if libName == "" { + libName = "./librfdetrcpp-fallback.so" + } + + rfdetrLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) + if err != nil { + panic(err) + } + + libFuncs := []LibFuncs{ + {&CapiLoad, "rfdetr_capi_load"}, + {&CapiUnload, "rfdetr_capi_unload"}, + {&CapiDetectPath, "rfdetr_capi_detect_path"}, + {&CapiDetectBuffer, "rfdetr_capi_detect_buffer"}, + {&CapiFreeString, "rfdetr_capi_free_string"}, + {&CapiGetNDetections, "rfdetr_capi_get_n_detections"}, + {&CapiGetDetectionClassID, "rfdetr_capi_get_detection_class_id"}, + {&CapiGetDetectionBox, "rfdetr_capi_get_detection_box"}, + {&CapiGetDetectionScore, "rfdetr_capi_get_detection_score"}, + {&CapiGetDetectionClassName, "rfdetr_capi_get_detection_class_name"}, + {&CapiGetDetectionMaskPNG, "rfdetr_capi_get_detection_mask_png"}, + } + + for _, lf := range libFuncs { + purego.RegisterLibFunc(lf.FuncPtr, rfdetrLib, lf.Name) + } + + flag.Parse() + + if err := grpc.StartServer(*addr, &RFDetrCpp{}); err != nil { + panic(err) + } +} diff --git a/backend/go/rfdetr-cpp/main_test.go b/backend/go/rfdetr-cpp/main_test.go new file mode 100644 index 000000000000..3e1c1eed1d64 --- /dev/null +++ b/backend/go/rfdetr-cpp/main_test.go @@ -0,0 +1,220 @@ +package main + +// main_test.go - end-to-end smoke test for the rfdetr-cpp gRPC backend. +// +// Spawns the compiled rfdetr-cpp binary on a free local port, dials it via +// gRPC, and exercises LoadModel + Detect against the test fixtures +// downloaded by test.sh. Two scenarios: +// +// 1. detection — loads rfdetr-nano-q8_0.gguf and asserts at least one +// detection comes back with a non-empty class name and a bounding box +// of non-zero size. +// 2. segmentation — loads rfdetr-seg-nano-q8_0.gguf and additionally +// asserts that at least one detection carries a PNG-encoded mask blob +// (verified by PNG magic bytes). +// +// Both specs Skip cleanly if their fixtures are missing so the test target +// stays usable on a fresh checkout where models haven't been downloaded. + +import ( + "context" + "encoding/base64" + "fmt" + "net" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + pb "github.com/mudler/LocalAI/pkg/grpc/proto" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +func TestRFDetrCpp(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "rfdetr-cpp backend smoke suite") +} + +// freePort grabs an ephemeral TCP port and immediately releases it so the +// spawned backend can bind to it. There is a tiny TOCTOU window here but in +// practice it's adequate for a smoke test on a quiet runner. +func freePort() int { + l, err := net.Listen("tcp", "127.0.0.1:0") + Expect(err).ToNot(HaveOccurred(), "freePort listen") + port := l.Addr().(*net.TCPAddr).Port + Expect(l.Close()).To(Succeed()) + return port +} + +// startBackend spawns the rfdetr-cpp binary on the given port and waits +// until it accepts TCP connections (up to 10s). The returned cleanup func +// kills the process and reaps it. +func startBackend(port int) func() { + binary, err := filepath.Abs("./rfdetr-cpp") + Expect(err).ToNot(HaveOccurred()) + if _, err := os.Stat(binary); err != nil { + Skip(fmt.Sprintf("backend binary not built: %s (run `make rfdetr-cpp` first)", binary)) + } + + libPath, err := filepath.Abs("./librfdetrcpp-fallback.so") + Expect(err).ToNot(HaveOccurred()) + if _, err := os.Stat(libPath); err != nil { + Skip(fmt.Sprintf("fallback library not built: %s (run `make librfdetrcpp-fallback.so` first)", libPath)) + } + + addr := fmt.Sprintf("127.0.0.1:%d", port) + cmd := exec.Command(binary, "--addr", addr) + cmd.Env = append(os.Environ(), "RFDETR_LIBRARY="+libPath) + cmd.Stdout = os.Stderr + cmd.Stderr = os.Stderr + Expect(cmd.Start()).To(Succeed()) + + cleanup := func() { + if cmd.Process != nil { + _ = cmd.Process.Kill() + _, _ = cmd.Process.Wait() + } + } + + deadline := time.Now().Add(10 * time.Second) + for time.Now().Before(deadline) { + c, err := net.DialTimeout("tcp", addr, 200*time.Millisecond) + if err == nil { + _ = c.Close() + return cleanup + } + time.Sleep(200 * time.Millisecond) + } + + cleanup() + Fail(fmt.Sprintf("backend did not become ready on %s within 10s", addr)) + return func() {} +} + +// loadTestImage reads the COCO test image downloaded by test.sh and returns +// its base64-encoded content (the wire format accepted by the Detect RPC). +func loadTestImage() string { + imgPath, err := filepath.Abs("test-data/test.jpg") + Expect(err).ToNot(HaveOccurred()) + imgBytes, err := os.ReadFile(imgPath) + if err != nil { + Skip(fmt.Sprintf("test image not present: %s (run test.sh first)", imgPath)) + } + return base64.StdEncoding.EncodeToString(imgBytes) +} + +// dialBackend opens a gRPC client connection to the spawned backend. +func dialBackend(port int) (pb.BackendClient, func()) { + addr := fmt.Sprintf("127.0.0.1:%d", port) + conn, err := grpc.NewClient(addr, grpc.WithTransportCredentials(insecure.NewCredentials())) + Expect(err).ToNot(HaveOccurred()) + return pb.NewBackendClient(conn), func() { _ = conn.Close() } +} + +// modelPathOrSkip resolves a model file under ./test-models/ and Skip()s +// the current spec if it's missing. +func modelPathOrSkip(name string) string { + modelDir, err := filepath.Abs("test-models") + Expect(err).ToNot(HaveOccurred()) + modelPath := filepath.Join(modelDir, name) + if _, err := os.Stat(modelPath); err != nil { + Skip(fmt.Sprintf("model not present: %s (run test.sh first)", modelPath)) + } + return modelPath +} + +var _ = Describe("rfdetr-cpp backend", func() { + It("runs object detection against a known-good COCO image", func() { + modelPath := modelPathOrSkip("rfdetr-nano-q8_0.gguf") + imgB64 := loadTestImage() + + port := freePort() + cleanup := startBackend(port) + defer cleanup() + + client, closeConn := dialBackend(port) + defer closeConn() + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + loadResp, err := client.LoadModel(ctx, &pb.ModelOptions{ + Model: "rfdetr-nano-q8_0.gguf", + ModelFile: modelPath, + Threads: 2, + }) + Expect(err).ToNot(HaveOccurred(), "LoadModel") + Expect(loadResp.GetSuccess()).To(BeTrue(), "LoadModel reported failure: %s", loadResp.GetMessage()) + + detResp, err := client.Detect(ctx, &pb.DetectOptions{ + Src: imgB64, + Threshold: 0.5, + }) + Expect(err).ToNot(HaveOccurred(), "Detect") + Expect(detResp.GetDetections()).ToNot(BeEmpty(), "no detections returned on a known-good COCO image") + + _, _ = fmt.Fprintf(GinkgoWriter, "detection OK: %d detections\n", len(detResp.GetDetections())) + for i, d := range detResp.GetDetections() { + Expect(d.GetClassName()).ToNot(BeEmpty(), "detection %d has empty class_name", i) + Expect(d.GetConfidence()).To(BeNumerically(">=", float32(0.5)), + "detection %d below threshold", i) + Expect(d.GetWidth()).To(BeNumerically(">", float32(0)), + "detection %d has non-positive width", i) + Expect(d.GetHeight()).To(BeNumerically(">", float32(0)), + "detection %d has non-positive height", i) + } + }) + + It("runs segmentation and returns PNG-encoded masks", func() { + modelPath := modelPathOrSkip("rfdetr-seg-nano-q8_0.gguf") + imgB64 := loadTestImage() + + port := freePort() + cleanup := startBackend(port) + defer cleanup() + + client, closeConn := dialBackend(port) + defer closeConn() + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + loadResp, err := client.LoadModel(ctx, &pb.ModelOptions{ + Model: "rfdetr-seg-nano-q8_0.gguf", + ModelFile: modelPath, + Threads: 2, + }) + Expect(err).ToNot(HaveOccurred(), "LoadModel") + Expect(loadResp.GetSuccess()).To(BeTrue(), "LoadModel reported failure: %s", loadResp.GetMessage()) + + detResp, err := client.Detect(ctx, &pb.DetectOptions{ + Src: imgB64, + Threshold: 0.5, + }) + Expect(err).ToNot(HaveOccurred(), "Detect") + Expect(detResp.GetDetections()).ToNot(BeEmpty(), "no detections returned from segmentation model") + + haveMask := false + for i, d := range detResp.GetDetections() { + m := d.GetMask() + if len(m) == 0 { + continue + } + haveMask = true + // Verify PNG magic: 89 50 4E 47 ("\x89PNG"). + Expect(len(m)).To(BeNumerically(">=", 4), "detection %d mask too short", i) + Expect([]byte{m[0], m[1], m[2], m[3]}).To(Equal([]byte{0x89, 'P', 'N', 'G'}), + "detection %d mask is not a PNG", i) + } + Expect(haveMask).To(BeTrue(), + "segmentation model returned %d detections but none carried a mask", + len(detResp.GetDetections())) + + _, _ = fmt.Fprintf(GinkgoWriter, "segmentation OK: %d detections, at least one with PNG mask\n", + len(detResp.GetDetections())) + }) +}) diff --git a/backend/go/rfdetr-cpp/package.sh b/backend/go/rfdetr-cpp/package.sh new file mode 100755 index 000000000000..9591b79dca97 --- /dev/null +++ b/backend/go/rfdetr-cpp/package.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +# Script to copy the appropriate libraries based on architecture + +set -e + +CURDIR=$(dirname "$(realpath $0)") +REPO_ROOT="${CURDIR}/../../.." + +# Create lib directory +mkdir -p $CURDIR/package/lib + +cp -avf $CURDIR/librfdetrcpp-*.so $CURDIR/package/ +cp -avf $CURDIR/rfdetr-cpp $CURDIR/package/ +cp -fv $CURDIR/run.sh $CURDIR/package/ + +# Detect architecture and copy appropriate libraries +if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then + # x86_64 architecture + echo "Detected x86_64 architecture, copying x86_64 libraries..." + cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so + cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2 + cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0 +elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then + # ARM64 architecture + echo "Detected ARM64 architecture, copying ARM64 libraries..." + cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so + cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2 + cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0 +elif [ $(uname -s) = "Darwin" ]; then + echo "Detected Darwin" +else + echo "Error: Could not detect architecture" + exit 1 +fi + +# Package GPU libraries based on BUILD_TYPE +GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh" +if [ -f "$GPU_LIB_SCRIPT" ]; then + echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..." + source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib" + package_gpu_libs +fi + +echo "Packaging completed successfully" +ls -liah $CURDIR/package/ +ls -liah $CURDIR/package/lib/ diff --git a/backend/go/rfdetr-cpp/run.sh b/backend/go/rfdetr-cpp/run.sh new file mode 100755 index 000000000000..042904e45dd0 --- /dev/null +++ b/backend/go/rfdetr-cpp/run.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -ex + +# Get the absolute current dir where the script is located +CURDIR=$(dirname "$(realpath $0)") + +cd / + +echo "CPU info:" +if [ "$(uname)" != "Darwin" ]; then + grep -e "model\sname" /proc/cpuinfo | head -1 + grep -e "flags" /proc/cpuinfo | head -1 +fi + +LIBRARY="$CURDIR/librfdetrcpp-fallback.so" + +if [ "$(uname)" != "Darwin" ]; then + if grep -q -e "\savx\s" /proc/cpuinfo ; then + echo "CPU: AVX found OK" + if [ -e $CURDIR/librfdetrcpp-avx.so ]; then + LIBRARY="$CURDIR/librfdetrcpp-avx.so" + fi + fi + + if grep -q -e "\savx2\s" /proc/cpuinfo ; then + echo "CPU: AVX2 found OK" + if [ -e $CURDIR/librfdetrcpp-avx2.so ]; then + LIBRARY="$CURDIR/librfdetrcpp-avx2.so" + fi + fi + + # Check avx 512 + if grep -q -e "\savx512f\s" /proc/cpuinfo ; then + echo "CPU: AVX512F found OK" + if [ -e $CURDIR/librfdetrcpp-avx512.so ]; then + LIBRARY="$CURDIR/librfdetrcpp-avx512.so" + fi + fi +fi + +export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH +export RFDETR_LIBRARY=$LIBRARY + +# If there is a lib/ld.so, use it +if [ -f $CURDIR/lib/ld.so ]; then + echo "Using lib/ld.so" + echo "Using library: $LIBRARY" + exec $CURDIR/lib/ld.so $CURDIR/rfdetr-cpp "$@" +fi + +echo "Using library: $LIBRARY" +exec $CURDIR/rfdetr-cpp "$@" diff --git a/backend/go/rfdetr-cpp/test.sh b/backend/go/rfdetr-cpp/test.sh new file mode 100755 index 000000000000..dd30412005cb --- /dev/null +++ b/backend/go/rfdetr-cpp/test.sh @@ -0,0 +1,55 @@ +#!/bin/bash +set -e + +CURDIR=$(dirname "$(realpath $0)") + +echo "Running rfdetr-cpp backend tests..." + +# Test models from the mudler/rfdetr-cpp-* HuggingFace repos. Both the +# detection (nano-q8_0, ~36 MB) and segmentation (seg-nano-q8_0, ~40 MB) +# variants are downloaded so the Go smoke test exercises both code paths. +RFDETR_MODEL_DIR="${RFDETR_MODEL_DIR:-$CURDIR/test-models}" + +RFDETR_DET_FILE="${RFDETR_DET_FILE:-rfdetr-nano-q8_0.gguf}" +RFDETR_DET_URL="${RFDETR_DET_URL:-https://huggingface.co/mudler/rfdetr-cpp-nano/resolve/main/rfdetr-nano-q8_0.gguf}" + +RFDETR_SEG_FILE="${RFDETR_SEG_FILE:-rfdetr-seg-nano-q8_0.gguf}" +RFDETR_SEG_URL="${RFDETR_SEG_URL:-https://huggingface.co/mudler/rfdetr-cpp-seg-nano/resolve/main/rfdetr-seg-nano-q8_0.gguf}" + +mkdir -p "$RFDETR_MODEL_DIR" + +if [ ! -f "$RFDETR_MODEL_DIR/$RFDETR_DET_FILE" ]; then + echo "Downloading rfdetr nano-q8_0 detection model..." + curl -L -o "$RFDETR_MODEL_DIR/$RFDETR_DET_FILE" "$RFDETR_DET_URL" --progress-bar +fi + +if [ ! -f "$RFDETR_MODEL_DIR/$RFDETR_SEG_FILE" ]; then + echo "Downloading rfdetr seg-nano-q8_0 segmentation model..." + curl -L -o "$RFDETR_MODEL_DIR/$RFDETR_SEG_FILE" "$RFDETR_SEG_URL" --progress-bar +fi + +# Use a real COCO test image from the upstream rf-detr.cpp repo (~46 KB). +# A synthetic 64x64 red PNG was too synthetic to elicit detections from a +# real model — the smoke test would always trivially pass with zero +# detections. +TEST_IMAGE_DIR="$CURDIR/test-data" +TEST_IMAGE_FILE="$TEST_IMAGE_DIR/test.jpg" +TEST_IMAGE_URL="${TEST_IMAGE_URL:-https://raw.githubusercontent.com/mudler/rf-detr.cpp/main/tests/fixtures/ci/test_image.jpg}" + +mkdir -p "$TEST_IMAGE_DIR" +if [ ! -f "$TEST_IMAGE_FILE" ]; then + echo "Downloading COCO test image..." + curl -L -o "$TEST_IMAGE_FILE" "$TEST_IMAGE_URL" --progress-bar +fi + +echo "rfdetr-cpp test setup complete." +echo " detection model: $RFDETR_MODEL_DIR/$RFDETR_DET_FILE" +echo " segmentation model: $RFDETR_MODEL_DIR/$RFDETR_SEG_FILE" +echo " test image: $TEST_IMAGE_FILE" + +# Run the Go smoke test: spawns the backend binary on a free port, calls +# LoadModel + Detect via gRPC for both detection and segmentation models. +echo "" +echo "Running Go smoke test..." +cd "$CURDIR" +go test -v -timeout 5m ./... diff --git a/backend/index.yaml b/backend/index.yaml index a63f054daab0..f205632d7dc5 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -253,6 +253,34 @@ nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-sam3-cpp" intel: "intel-sycl-f32-sam3-cpp" vulkan: "vulkan-sam3-cpp" +- &rfdetrcpp + name: "rfdetr-cpp" + alias: "rfdetr-cpp" + license: apache-2.0 + description: | + Native RF-DETR object detection and instance segmentation in C/C++ + using GGML. Loads pre-built GGUF weights from the mudler/rfdetr-cpp-* + family (Nano/Small/Base/Medium/Large + SegNano/SegSmall/SegMedium) + and returns bounding boxes, class labels, confidence scores, and + (for segmentation variants) PNG-encoded per-detection masks. + urls: + - https://github.com/mudler/rf-detr.cpp + tags: + - object-detection + - image-segmentation + - rfdetr + - gpu + - cpu + capabilities: + default: "cpu-rfdetr-cpp" + nvidia: "cuda12-rfdetr-cpp" + nvidia-cuda-12: "cuda12-rfdetr-cpp" + nvidia-cuda-13: "cuda13-rfdetr-cpp" + nvidia-l4t: "nvidia-l4t-arm64-rfdetr-cpp" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-rfdetr-cpp" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-rfdetr-cpp" + intel: "intel-sycl-f32-rfdetr-cpp" + vulkan: "vulkan-rfdetr-cpp" - &vllm name: "vllm" license: apache-2.0 @@ -2349,6 +2377,99 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-sam3-cpp" mirrors: - localai/localai-backends:master-gpu-vulkan-sam3-cpp +## rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "rfdetr-cpp-development" + capabilities: + default: "cpu-rfdetr-cpp-development" + nvidia: "cuda12-rfdetr-cpp-development" + nvidia-cuda-12: "cuda12-rfdetr-cpp-development" + nvidia-cuda-13: "cuda13-rfdetr-cpp-development" + nvidia-l4t: "nvidia-l4t-arm64-rfdetr-cpp-development" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-rfdetr-cpp-development" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-rfdetr-cpp-development" + intel: "intel-sycl-f32-rfdetr-cpp-development" + vulkan: "vulkan-rfdetr-cpp-development" +- !!merge <<: *rfdetrcpp + name: "cpu-rfdetr-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-rfdetr-cpp" + mirrors: + - localai/localai-backends:latest-cpu-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "cpu-rfdetr-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-cpu-rfdetr-cpp" + mirrors: + - localai/localai-backends:master-cpu-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "cuda12-rfdetr-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rfdetr-cpp" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-12-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "cuda12-rfdetr-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-rfdetr-cpp" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-12-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "cuda13-rfdetr-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-rfdetr-cpp" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "cuda13-rfdetr-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-rfdetr-cpp" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "nvidia-l4t-arm64-rfdetr-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-rfdetr-cpp" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-arm64-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "nvidia-l4t-arm64-rfdetr-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-rfdetr-cpp" + mirrors: + - localai/localai-backends:master-nvidia-l4t-arm64-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "cuda13-nvidia-l4t-arm64-rfdetr-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-rfdetr-cpp" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "cuda13-nvidia-l4t-arm64-rfdetr-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-rfdetr-cpp" + mirrors: + - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "intel-sycl-f32-rfdetr-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-rfdetr-cpp" + mirrors: + - localai/localai-backends:latest-gpu-intel-sycl-f32-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "intel-sycl-f32-rfdetr-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-rfdetr-cpp" + mirrors: + - localai/localai-backends:master-gpu-intel-sycl-f32-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "intel-sycl-f16-rfdetr-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-rfdetr-cpp" + mirrors: + - localai/localai-backends:latest-gpu-intel-sycl-f16-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "intel-sycl-f16-rfdetr-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-rfdetr-cpp" + mirrors: + - localai/localai-backends:master-gpu-intel-sycl-f16-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "vulkan-rfdetr-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-rfdetr-cpp" + mirrors: + - localai/localai-backends:latest-gpu-vulkan-rfdetr-cpp +- !!merge <<: *rfdetrcpp + name: "vulkan-rfdetr-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-rfdetr-cpp" + mirrors: + - localai/localai-backends:master-gpu-vulkan-rfdetr-cpp ## Rerankers - !!merge <<: *rerankers name: "rerankers-development" diff --git a/core/gallery/importers/rfdetr.go b/core/gallery/importers/rfdetr.go index 5a086c7f8554..fd0e30581648 100644 --- a/core/gallery/importers/rfdetr.go +++ b/core/gallery/importers/rfdetr.go @@ -31,6 +31,29 @@ func repoLooksLikeRFDetr(repo string) bool { return strings.Contains(lower, "rf-detr") || strings.Contains(lower, "rfdetr") } +// repoHasGGUF inspects the HuggingFace file list (when available) to decide +// whether the repo ships RF-DETR weights in ggml/GGUF form — the native +// rfdetr-cpp backend's input format. Mudler's rfdetr-cpp-* repos +// (mudler/rfdetr-cpp-nano, mudler/rfdetr-cpp-base, ...) match. +func repoHasGGUF(details Details) bool { + if details.HuggingFace == nil { + return false + } + for _, f := range details.HuggingFace.Files { + if strings.HasSuffix(strings.ToLower(f.Path), ".gguf") { + return true + } + } + return false +} + +func repoLooksLikeRFDetrCpp(repo string) bool { + lower := strings.ToLower(repo) + return strings.Contains(lower, "rfdetr-cpp") || strings.Contains(lower, "rf-detr-cpp") || + strings.Contains(lower, "rfdetr.cpp") || strings.Contains(lower, "rt-detr.cpp") || + strings.Contains(lower, "rf-detr.cpp") +} + func (i *RFDetrImporter) Match(details Details) bool { preferences, err := details.Preferences.MarshalJSON() if err != nil { @@ -43,7 +66,7 @@ func (i *RFDetrImporter) Match(details Details) bool { } } - if b, ok := preferencesMap["backend"].(string); ok && b == "rfdetr" { + if b, ok := preferencesMap["backend"].(string); ok && (b == "rfdetr" || b == "rfdetr-cpp") { return true } @@ -99,10 +122,28 @@ func (i *RFDetrImporter) Import(details Details) (gallery.ModelConfig, error) { model = owner + "/" + repo } + // Route GGUF-bearing repos (mudler/rfdetr-cpp-*) to the native + // rfdetr-cpp backend; HF transformer repos keep the Python rfdetr + // backend. Explicit preferences.backend overrides the heuristic. + backend := "rfdetr" + if b, ok := preferencesMap["backend"].(string); ok && b != "" { + backend = b + } else if repoHasGGUF(details) { + backend = "rfdetr-cpp" + } else if details.HuggingFace != nil { + repoName := details.HuggingFace.ModelID + if idx := strings.Index(repoName, "/"); idx >= 0 { + repoName = repoName[idx+1:] + } + if repoLooksLikeRFDetrCpp(repoName) { + backend = "rfdetr-cpp" + } + } + modelConfig := config.ModelConfig{ Name: name, Description: description, - Backend: "rfdetr", + Backend: backend, KnownUsecaseStrings: []string{"detection"}, PredictionOptions: schema.PredictionOptions{ BasicModelRequest: schema.BasicModelRequest{Model: model}, diff --git a/core/gallery/importers/rfdetr_test.go b/core/gallery/importers/rfdetr_test.go index 72f871e16f1f..2274b7dddaed 100644 --- a/core/gallery/importers/rfdetr_test.go +++ b/core/gallery/importers/rfdetr_test.go @@ -129,4 +129,125 @@ var _ = Describe("RFDetrImporter", func() { Expect(modelConfig.Description).To(Equal("Custom")) }) }) + + // Table-driven coverage of the GGUF auto-routing path between the + // Python rfdetr backend (HF transformer repos) and the native + // rfdetr-cpp backend (GGUF repos like mudler/rfdetr-cpp-*). + // + // Cases are kept offline-deterministic by injecting Details directly + // rather than going through DiscoverModelConfig (which would hit live HF). + // The live HF cross-check lives in its own Context below. + Context("GGUF auto-routing (offline)", func() { + hfFile := func(path string) hfapi.ModelFile { + return hfapi.ModelFile{Path: path} + } + + type tc struct { + name string + uri string + modelID string + files []hfapi.ModelFile + prefs string + expectBackend string // expected `backend:` line content + rejectBackends []string + } + + entries := []tc{ + { + name: "GGUF repo with rfdetr-cpp prefix routes to rfdetr-cpp", + uri: "https://huggingface.co/mudler/rfdetr-cpp-nano", + modelID: "mudler/rfdetr-cpp-nano", + files: []hfapi.ModelFile{hfFile("rfdetr-nano-q8_0.gguf"), hfFile("README.md")}, + prefs: "", + expectBackend: "backend: rfdetr-cpp", + }, + { + name: "GGUF presence alone routes to rfdetr-cpp even when repo name lacks -cpp", + uri: "https://huggingface.co/some/rf-detr-ggml", + modelID: "some/rf-detr-ggml", + files: []hfapi.ModelFile{hfFile("rfdetr-base-f16.gguf")}, + prefs: "", + expectBackend: "backend: rfdetr-cpp", + }, + { + name: "transformer repo without GGUF stays on the Python rfdetr backend", + uri: "https://huggingface.co/roboflow/rf-detr-base", + modelID: "roboflow/rf-detr-base", + files: []hfapi.ModelFile{hfFile("config.json"), hfFile("pytorch_model.bin")}, + prefs: "", + expectBackend: "backend: rfdetr\n", + rejectBackends: []string{"backend: rfdetr-cpp"}, + }, + { + name: "explicit preferences.backend=rfdetr overrides GGUF auto-detect", + uri: "https://huggingface.co/mudler/rfdetr-cpp-nano", + modelID: "mudler/rfdetr-cpp-nano", + files: []hfapi.ModelFile{hfFile("rfdetr-nano-q8_0.gguf")}, + prefs: `{"backend": "rfdetr"}`, + expectBackend: "backend: rfdetr\n", + rejectBackends: []string{"backend: rfdetr-cpp"}, + }, + { + name: "explicit preferences.backend=rfdetr-cpp wins on non-GGUF transformer repo", + uri: "https://huggingface.co/roboflow/rf-detr-base", + modelID: "roboflow/rf-detr-base", + files: []hfapi.ModelFile{hfFile("config.json")}, + prefs: `{"backend": "rfdetr-cpp"}`, + expectBackend: "backend: rfdetr-cpp", + }, + { + name: "repo name with rfdetr.cpp pattern routes to rfdetr-cpp even without HF file list", + uri: "https://huggingface.co/some/rfdetr.cpp-bundle", + modelID: "some/rfdetr.cpp-bundle", + files: nil, + prefs: "", + expectBackend: "backend: rfdetr-cpp", + }, + } + + for _, e := range entries { + e := e // capture for closure + It(e.name, func() { + imp := &importers.RFDetrImporter{} + details := importers.Details{ + URI: e.uri, + HuggingFace: &hfapi.ModelDetails{ + ModelID: e.modelID, + Files: e.files, + }, + } + if e.prefs != "" { + details.Preferences = json.RawMessage(e.prefs) + } + + // Match must always be true for these fixtures — they're + // either preference-driven or have an rfdetr/rf-detr token. + Expect(imp.Match(details)).To(BeTrue(), fmt.Sprintf("Match should fire for %+v", details)) + + modelConfig, err := imp.Import(details) + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Import error: %v", err)) + Expect(modelConfig.ConfigFile).To(ContainSubstring(e.expectBackend), + fmt.Sprintf("Model config: %+v", modelConfig)) + for _, rej := range e.rejectBackends { + Expect(modelConfig.ConfigFile).ToNot(ContainSubstring(rej), + fmt.Sprintf("did not expect %q in: %+v", rej, modelConfig)) + } + }) + } + }) + + // Live HF cross-check: the canonical native GGUF repo for the + // rfdetr-cpp backend. Marked broad — we only assert the routing + // decision, not file lists (upstream may add quants over time). + Context("detection from HuggingFace: mudler/rfdetr-cpp-nano", func() { + It("auto-routes to the native rfdetr-cpp backend without preferences", func() { + uri := "https://huggingface.co/mudler/rfdetr-cpp-nano" + modelConfig, err := importers.DiscoverModelConfig(uri, json.RawMessage(`{}`)) + + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: rfdetr-cpp"), + fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("mudler/rfdetr-cpp-nano")) + }) + }) }) diff --git a/docs/content/features/object-detection.md b/docs/content/features/object-detection.md index 1acdc3a16696..0c2b9e68a6ec 100644 --- a/docs/content/features/object-detection.md +++ b/docs/content/features/object-detection.md @@ -5,7 +5,7 @@ weight = 13 url = "/features/object-detection/" +++ -LocalAI supports object detection and image segmentation through various backends. This feature allows you to identify and locate objects within images with high accuracy and real-time performance. Available backends include [RF-DETR](https://github.com/roboflow/rf-detr) for object detection and [sam3.cpp](https://github.com/PABannier/sam3.cpp) for image segmentation (SAM 3/2/EdgeTAM). +LocalAI supports object detection and image segmentation through various backends. This feature allows you to identify and locate objects within images with high accuracy and real-time performance. Available backends include [RF-DETR](https://github.com/roboflow/rf-detr) (Python) and [rf-detr.cpp](https://github.com/mudler/rf-detr.cpp) (native C++/ggml) for object detection and segmentation, and [sam3.cpp](https://github.com/PABannier/sam3.cpp) for image segmentation (SAM 3/2/EdgeTAM). For detecting **faces** specifically, see the dedicated [Face Recognition](/features/face-recognition/) feature — its @@ -135,6 +135,74 @@ Currently, the following model is available in the [Model Gallery]({{%relref "fe You can browse and install this model through the LocalAI web interface or using the command line. +### RF-DETR Native Backend (rfdetr-cpp) + +The `rfdetr-cpp` backend is a native C++/ggml implementation of RF-DETR +inference based on [rf-detr.cpp](https://github.com/mudler/rf-detr.cpp). It +runs as a Go gRPC service that dlopens a per-CPU-variant shared library, so +there is no Python runtime on the inference path — startup is fast and the +binary is self-contained. + +Compared to the Python `rfdetr` backend, the native backend: + +- Has no Python or PyTorch dependency at inference time +- Loads quantized GGUF models (F32, F16, Q8_0, Q4_K) for smaller footprint +- Supports both detection and segmentation variants of RF-DETR +- Returns segmentation masks as PNG bytes in `Detection.mask` + +#### Setup + +1. **Install the backend** + + ```bash + local-ai backends install rfdetr-cpp + ``` + +2. **Using the Model Gallery (Recommended)** + + The gallery ships ready-to-run entries for every published variant: + + ```bash + # Detection variants + local-ai run rfdetr-cpp-nano + local-ai run rfdetr-cpp-small + local-ai run rfdetr-cpp-base + local-ai run rfdetr-cpp-medium + local-ai run rfdetr-cpp-large + + # Segmentation variants (return per-instance PNG masks) + local-ai run rfdetr-cpp-seg-nano + local-ai run rfdetr-cpp-seg-small + local-ai run rfdetr-cpp-seg-medium + local-ai run rfdetr-cpp-seg-large + local-ai run rfdetr-cpp-seg-xlarge + local-ai run rfdetr-cpp-seg-2xlarge + ``` + +3. **Manual Configuration** + + ```yaml + name: rfdetr-cpp-seg-nano + backend: rfdetr-cpp + parameters: + model: rfdetr-seg-nano-f16.gguf + threads: 4 + known_usecases: + - detection + ``` + + Pre-quantized GGUFs are published under + [`mudler/rfdetr-cpp-*`](https://huggingface.co/mudler?search_models=rfdetr-cpp) + on Hugging Face. Each repo carries the F32/F16/Q8_0/Q4_K quants — F16 is + the recommended default (matches F32 accuracy, ~1.86x smaller). + +#### Segmentation Output + +When running a segmentation model (any `rfdetr-cpp-seg-*` variant), each +`Detection` in the response carries a `mask` field with a base64-encoded +PNG of the per-instance binary mask. The mask is sized to the original +image resolution and aligns with the corresponding bounding box. + ### SAM3 Backend (sam3-cpp) The sam3-cpp backend provides image segmentation using [sam3.cpp](https://github.com/PABannier/sam3.cpp), a portable C++ implementation of Meta's Segment Anything Model. It supports multiple model architectures: @@ -261,7 +329,8 @@ local-ai run --debug rfdetr-base LocalAI includes a dedicated **object-detection** category for models and backends that specialize in identifying and locating objects within images. This category currently includes: -- **RF-DETR**: Real-time transformer-based object detection +- **RF-DETR**: Real-time transformer-based object detection (Python backend) +- **rfdetr-cpp**: Native C++/ggml RF-DETR for detection + segmentation - **sam3-cpp**: SAM 3/2/EdgeTAM image segmentation Additional object detection models and backends will be added to this category in the future. You can filter models by the `object-detection` tag in the model gallery to find all available object detection models. diff --git a/gallery/index.yaml b/gallery/index.yaml index 96ca4e77377a..adbad8f03b19 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -6118,6 +6118,317 @@ - detection parameters: model: rfdetr-base +- name: rfdetr-cpp-nano + url: github:mudler/LocalAI/gallery/virtual.yaml@master + urls: + - https://github.com/mudler/rf-detr.cpp + - https://huggingface.co/mudler/rfdetr-cpp-nano + description: | + RF-DETR Nano object detection model, served via the native rfdetr.cpp backend (ggml + purego, no Python). + Q8_0 quantization is the recommended default for CPU: same accuracy as F16/F32, ~20MB on disk, fastest CPU latency. + Pure C++/ggml runtime; no Python dependencies. Drop-in for the /v1/detection endpoint. + license: apache-2.0 + icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4 + tags: + - object-detection + - rfdetr + - native + - cpp + - cpu + overrides: + backend: rfdetr-cpp + known_usecases: + - detection + parameters: + model: rfdetr-nano-q8_0.gguf + files: + - filename: rfdetr-nano-q8_0.gguf + uri: huggingface://mudler/rfdetr-cpp-nano/rfdetr-nano-q8_0.gguf +- name: rfdetr-cpp-base + url: github:mudler/LocalAI/gallery/virtual.yaml@master + urls: + - https://github.com/mudler/rf-detr.cpp + - https://huggingface.co/mudler/rfdetr-cpp-base + description: | + RF-DETR Base object detection model, served via the native rfdetr.cpp backend. + F16 quantization is recommended on CPU: identical accuracy to F32, half the size, fastest. + license: apache-2.0 + icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4 + tags: + - object-detection + - rfdetr + - native + - cpp + - cpu + overrides: + backend: rfdetr-cpp + known_usecases: + - detection + parameters: + model: rfdetr-base-f16.gguf + files: + - filename: rfdetr-base-f16.gguf + uri: huggingface://mudler/rfdetr-cpp-base/rfdetr-base-f16.gguf +- name: rfdetr-cpp-small + url: github:mudler/LocalAI/gallery/virtual.yaml@master + urls: + - https://github.com/mudler/rf-detr.cpp + - https://huggingface.co/mudler/rfdetr-cpp-small + description: | + RF-DETR Small object detection model (DINOv2-small backbone, 512px input, 3 decoder layers), served + via the native rfdetr.cpp backend (ggml + purego, no Python). A step up from Nano in accuracy while + staying lightweight on CPU. F16 quantization is the recommended default: identical accuracy to F32 + at roughly half the size. Drop-in for the /v1/detection endpoint. + license: apache-2.0 + icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4 + tags: + - object-detection + - rfdetr + - native + - cpp + - cpu + overrides: + backend: rfdetr-cpp + known_usecases: + - detection + parameters: + model: rfdetr-small-f16.gguf + files: + - filename: rfdetr-small-f16.gguf + sha256: 5365264a976bb99ab31f735f43326e50b0804a60cd1709abe8c1c95114c4d79d + uri: huggingface://mudler/rfdetr-cpp-small/rfdetr-small-f16.gguf +- name: rfdetr-cpp-medium + url: github:mudler/LocalAI/gallery/virtual.yaml@master + urls: + - https://github.com/mudler/rf-detr.cpp + - https://huggingface.co/mudler/rfdetr-cpp-medium + description: | + RF-DETR Medium object detection model (DINOv2-small backbone, 576px input, 4 decoder layers), served + via the native rfdetr.cpp backend. Balanced detection quality vs. CPU latency — recommended when + Base is not accurate enough but Large is too slow. F16 quantization is the recommended default: + identical accuracy to F32, half the size. Drop-in for the /v1/detection endpoint. + license: apache-2.0 + icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4 + tags: + - object-detection + - rfdetr + - native + - cpp + - cpu + overrides: + backend: rfdetr-cpp + known_usecases: + - detection + parameters: + model: rfdetr-medium-f16.gguf + files: + - filename: rfdetr-medium-f16.gguf + sha256: 685b8f50890f099bbc603454309b2d5f1d471541420b95c20c6ed296aec1e7ae + uri: huggingface://mudler/rfdetr-cpp-medium/rfdetr-medium-f16.gguf +- name: rfdetr-cpp-large + url: github:mudler/LocalAI/gallery/virtual.yaml@master + urls: + - https://github.com/mudler/rf-detr.cpp + - https://huggingface.co/mudler/rfdetr-cpp-large + description: | + RF-DETR Large object detection model (DINOv2-small backbone, 704px input, 4 decoder layers), served + via the native rfdetr.cpp backend. Highest-accuracy detection variant — best for offline workflows + and high-resolution inputs where CPU latency is secondary to recall. F16 quantization is the + recommended default: identical accuracy to F32, half the size. Drop-in for the /v1/detection endpoint. + license: apache-2.0 + icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4 + tags: + - object-detection + - rfdetr + - native + - cpp + - cpu + overrides: + backend: rfdetr-cpp + known_usecases: + - detection + parameters: + model: rfdetr-large-f16.gguf + files: + - filename: rfdetr-large-f16.gguf + sha256: 819f1abc72f746a686722eacc9c4db992b7ca853b26e390ab0a66ca6ea70060a + uri: huggingface://mudler/rfdetr-cpp-large/rfdetr-large-f16.gguf +- name: rfdetr-cpp-seg-nano + url: github:mudler/LocalAI/gallery/virtual.yaml@master + urls: + - https://github.com/mudler/rf-detr.cpp + - https://huggingface.co/mudler/rfdetr-cpp-seg-nano + description: | + RF-DETR Seg-Nano instance segmentation model (DINOv2-small backbone, 312px input, 4 decoder layers, + 100 queries), served via the native rfdetr.cpp backend. Smallest segmentation variant — fastest CPU + latency, ideal for edge deployment. Returns both bounding boxes and per-instance masks via the + /v1/detection endpoint. F16 quantization is the recommended default: identical accuracy to F32, + half the size. + license: apache-2.0 + icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4 + tags: + - object-detection + - image-segmentation + - rfdetr + - native + - cpp + - cpu + overrides: + backend: rfdetr-cpp + known_usecases: + - detection + parameters: + model: rfdetr-seg-nano-f16.gguf + files: + - filename: rfdetr-seg-nano-f16.gguf + sha256: 9f9a0ab547743992b6c664d41ee1a6afcd66b21b04609a68f76c0eec88648c2b + uri: huggingface://mudler/rfdetr-cpp-seg-nano/rfdetr-seg-nano-f16.gguf +- name: rfdetr-cpp-seg-small + url: github:mudler/LocalAI/gallery/virtual.yaml@master + urls: + - https://github.com/mudler/rf-detr.cpp + - https://huggingface.co/mudler/rfdetr-cpp-seg-small + description: | + RF-DETR Seg-Small instance segmentation model (DINOv2-small backbone, 384px input, 4 decoder layers, + 100 queries), served via the native rfdetr.cpp backend. Step up from Seg-Nano in mask quality while + staying CPU-friendly. Returns both bounding boxes and per-instance masks via the /v1/detection + endpoint. F16 quantization is the recommended default: identical accuracy to F32, half the size. + license: apache-2.0 + icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4 + tags: + - object-detection + - image-segmentation + - rfdetr + - native + - cpp + - cpu + overrides: + backend: rfdetr-cpp + known_usecases: + - detection + parameters: + model: rfdetr-seg-small-f16.gguf + files: + - filename: rfdetr-seg-small-f16.gguf + sha256: 1b569a182aea941ec645a1923c1e8ad9db05e006db36136da9f148d1ec066670 + uri: huggingface://mudler/rfdetr-cpp-seg-small/rfdetr-seg-small-f16.gguf +- name: rfdetr-cpp-seg-medium + url: github:mudler/LocalAI/gallery/virtual.yaml@master + urls: + - https://github.com/mudler/rf-detr.cpp + - https://huggingface.co/mudler/rfdetr-cpp-seg-medium + description: | + RF-DETR Seg-Medium instance segmentation model (DINOv2-small backbone, 432px input, 5 decoder layers, + 200 queries), served via the native rfdetr.cpp backend. Balanced segmentation quality vs. CPU latency + — recommended for everyday segmentation workloads. Returns both bounding boxes and per-instance masks + via the /v1/detection endpoint. F16 quantization is the recommended default. + license: apache-2.0 + icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4 + tags: + - object-detection + - image-segmentation + - rfdetr + - native + - cpp + - cpu + overrides: + backend: rfdetr-cpp + known_usecases: + - detection + parameters: + model: rfdetr-seg-medium-f16.gguf + files: + - filename: rfdetr-seg-medium-f16.gguf + sha256: 885d85ed6935495fc50ff464e06b6ea3bd8e8386865852d68a8be0f649d65afe + uri: huggingface://mudler/rfdetr-cpp-seg-medium/rfdetr-seg-medium-f16.gguf +- name: rfdetr-cpp-seg-large + url: github:mudler/LocalAI/gallery/virtual.yaml@master + urls: + - https://github.com/mudler/rf-detr.cpp + - https://huggingface.co/mudler/rfdetr-cpp-seg-large + description: | + RF-DETR Seg-Large instance segmentation model (DINOv2-small backbone, 504px input, 5 decoder layers, + 200 queries), served via the native rfdetr.cpp backend. Higher-resolution input than Seg-Medium for + sharper mask boundaries. Returns both bounding boxes and per-instance masks via the /v1/detection + endpoint. F16 quantization is the recommended default: identical accuracy to F32, half the size. + license: apache-2.0 + icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4 + tags: + - object-detection + - image-segmentation + - rfdetr + - native + - cpp + - cpu + overrides: + backend: rfdetr-cpp + known_usecases: + - detection + parameters: + model: rfdetr-seg-large-f16.gguf + files: + - filename: rfdetr-seg-large-f16.gguf + sha256: 90423066d0791b4ae249f3986cce1f095a1e4090bf46800bf7f9e371ea80d559 + uri: huggingface://mudler/rfdetr-cpp-seg-large/rfdetr-seg-large-f16.gguf +- name: rfdetr-cpp-seg-xlarge + url: github:mudler/LocalAI/gallery/virtual.yaml@master + urls: + - https://github.com/mudler/rf-detr.cpp + - https://huggingface.co/mudler/rfdetr-cpp-seg-xlarge + description: | + RF-DETR Seg-XLarge instance segmentation model (DINOv2-small backbone, 624px input, 6 decoder layers, + 300 queries), served via the native rfdetr.cpp backend. High-capacity segmentation variant with more + queries and deeper decoder — best for dense scenes with many instances. Returns both bounding boxes + and per-instance masks via the /v1/detection endpoint. F16 quantization is the recommended default. + license: apache-2.0 + icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4 + tags: + - object-detection + - image-segmentation + - rfdetr + - native + - cpp + - cpu + overrides: + backend: rfdetr-cpp + known_usecases: + - detection + parameters: + model: rfdetr-seg-xlarge-f16.gguf + files: + - filename: rfdetr-seg-xlarge-f16.gguf + sha256: 0b82de4a6e65a40bc930979a1a4281cb24de35203d30eeefd797c858101a7bec + uri: huggingface://mudler/rfdetr-cpp-seg-xlarge/rfdetr-seg-xlarge-f16.gguf +- name: rfdetr-cpp-seg-2xlarge + url: github:mudler/LocalAI/gallery/virtual.yaml@master + urls: + - https://github.com/mudler/rf-detr.cpp + - https://huggingface.co/mudler/rfdetr-cpp-seg-2xlarge + description: | + RF-DETR Seg-2XLarge instance segmentation model (DINOv2-small backbone, 768px input, 6 decoder layers, + 300 queries), served via the native rfdetr.cpp backend. Highest-accuracy segmentation variant — best + for offline workflows and high-resolution inputs where CPU latency is secondary to mask quality. + Returns both bounding boxes and per-instance masks via the /v1/detection endpoint. F16 quantization + is the recommended default: identical accuracy to F32, half the size. + license: apache-2.0 + icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4 + tags: + - object-detection + - image-segmentation + - rfdetr + - native + - cpp + - cpu + overrides: + backend: rfdetr-cpp + known_usecases: + - detection + parameters: + model: rfdetr-seg-2xlarge-f16.gguf + files: + - filename: rfdetr-seg-2xlarge-f16.gguf + sha256: 7f957997db23e844194ea8266a95b4adc3deb6d0b71c0924922b20fbdeafa299 + uri: huggingface://mudler/rfdetr-cpp-seg-2xlarge/rfdetr-seg-2xlarge-f16.gguf - name: edgetam url: github:mudler/LocalAI/gallery/virtual.yaml@master urls: