Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions .github/backend-matrix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,19 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-rfdetr-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "rfdetr-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
Expand Down Expand Up @@ -1491,6 +1504,19 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13-rfdetr-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "rfdetr-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
Expand All @@ -1504,6 +1530,19 @@ include:
backend: "sam3-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-rfdetr-cpp'
base-image: "ubuntu:24.04"
ubuntu-version: '2404'
runs-on: 'ubuntu-24.04-arm'
backend: "rfdetr-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
Expand Down Expand Up @@ -2635,6 +2674,74 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
# rfdetr-cpp
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-rfdetr-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "rfdetr-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-rfdetr-cpp'
runs-on: 'ubuntu-latest'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
skip-drivers: 'false'
backend: "rfdetr-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-rfdetr-cpp'
runs-on: 'ubuntu-latest'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
skip-drivers: 'false'
backend: "rfdetr-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
platform-tag: 'amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-rfdetr-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "rfdetr-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/arm64'
platform-tag: 'arm64'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-rfdetr-cpp'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "rfdetr-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
Expand Down Expand Up @@ -2715,6 +2822,19 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2204'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64-rfdetr-cpp'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
runs-on: 'ubuntu-24.04-arm'
backend: "rfdetr-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2204'
# whisper
- build-type: ''
cuda-major-version: ""
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/bump_deps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ jobs:
variable: "SAM3_VERSION"
branch: "main"
file: "backend/go/sam3-cpp/Makefile"
- repository: "mudler/rf-detr.cpp"
variable: "RFDETR_VERSION"
branch: "main"
file: "backend/go/rfdetr-cpp/Makefile"
- repository: "predict-woo/qwen3-tts.cpp"
variable: "QWEN3TTS_CPP_VERSION"
branch: "main"
Expand Down
37 changes: 37 additions & 0 deletions .github/workflows/test-extra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ jobs:
sglang: ${{ steps.detect.outputs.sglang }}
acestep-cpp: ${{ steps.detect.outputs.acestep-cpp }}
qwen3-tts-cpp: ${{ steps.detect.outputs.qwen3-tts-cpp }}
rfdetr-cpp: ${{ steps.detect.outputs.rfdetr-cpp }}
vibevoice-cpp: ${{ steps.detect.outputs.vibevoice-cpp }}
localvqe: ${{ steps.detect.outputs.localvqe }}
voxtral: ${{ steps.detect.outputs.voxtral }}
Expand Down Expand Up @@ -843,6 +844,42 @@ jobs:
- name: Test qwen3-tts-cpp
run: |
make --jobs=5 --output-sync=target -C backend/go/qwen3-tts-cpp test
# Per-backend smoke for rfdetr-cpp: builds the .so + Go binary and runs
# `make -C backend/go/rfdetr-cpp test`. test.sh fetches the small (~20 MB)
# rfdetr-nano-q8_0 GGUF from the published mudler/rfdetr-cpp-nano HF repo
# via curl and synthesises a tiny PNG to exercise the wire protocol.
tests-rfdetr-cpp:
needs: detect-changes
if: needs.detect-changes.outputs.rfdetr-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true'
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v6
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y build-essential cmake curl libopenblas-dev
- name: Setup Go
uses: actions/setup-go@v5
- name: Display Go version
run: go version
- name: Proto Dependencies
run: |
# Install protoc
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
rm protoc.zip
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Build rfdetr-cpp
run: |
make --jobs=5 --output-sync=target -C backend/go/rfdetr-cpp
- name: Test rfdetr-cpp
run: |
make --jobs=5 --output-sync=target -C backend/go/rfdetr-cpp test
# Per-backend smoke for vibevoice-cpp: builds the .so + Go binary and
# runs `make -C backend/go/vibevoice-cpp test`. test.sh auto-downloads
# the published mudler/vibevoice.cpp-models bundle (TTS Q8_0 + ASR Q4_K
Expand Down
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Disable parallel execution for backend builds
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/rfdetr-cpp backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio

GOCMD=go
GOTEST=$(GOCMD) test
Expand Down Expand Up @@ -481,6 +481,7 @@ prepare-test-extra: protogen-python
$(MAKE) -C backend/python/insightface
$(MAKE) -C backend/python/speaker-recognition
$(MAKE) -C backend/rust/kokoros kokoros-grpc
$(MAKE) -C backend/go/rfdetr-cpp

test-extra: prepare-test-extra
$(MAKE) -C backend/python/transformers test
Expand All @@ -507,6 +508,7 @@ test-extra: prepare-test-extra
$(MAKE) -C backend/python/insightface test
$(MAKE) -C backend/python/speaker-recognition test
$(MAKE) -C backend/rust/kokoros test
$(MAKE) -C backend/go/rfdetr-cpp test

##
## End-to-end gRPC tests that exercise a built backend container image.
Expand Down Expand Up @@ -1119,6 +1121,7 @@ BACKEND_KOKOROS = kokoros|rust|.|false|true

# C++ backends (Go wrapper with purego)
BACKEND_SAM3_CPP = sam3-cpp|golang|.|false|true
BACKEND_RFDETR_CPP = rfdetr-cpp|golang|.|false|true

# Helper function to build docker image for a backend
# Usage: $(call docker-build-backend,BACKEND_NAME,DOCKERFILE_TYPE,BUILD_CONTEXT,PROGRESS_FLAG,NEEDS_BACKEND_ARG)
Expand Down Expand Up @@ -1198,13 +1201,14 @@ $(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP_QUANTIZATION)))
$(eval $(call generate-docker-build-target,$(BACKEND_TINYGRAD)))
$(eval $(call generate-docker-build-target,$(BACKEND_KOKOROS)))
$(eval $(call generate-docker-build-target,$(BACKEND_SAM3_CPP)))
$(eval $(call generate-docker-build-target,$(BACKEND_RFDETR_CPP)))
$(eval $(call generate-docker-build-target,$(BACKEND_SHERPA_ONNX)))

# Pattern rule for docker-save targets
docker-save-%: backend-images
docker save local-ai-backend:$* -o backend-images/$*.tar

docker-build-backends: docker-build-llama-cpp docker-build-ik-llama-cpp docker-build-turboquant docker-build-ds4 docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-sglang docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-liquid-audio docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-tinygrad docker-build-kokoros docker-build-sam3-cpp docker-build-qwen3-tts-cpp docker-build-vibevoice-cpp docker-build-localvqe docker-build-insightface docker-build-speaker-recognition docker-build-sherpa-onnx docker-build-cloud-proxy
docker-build-backends: docker-build-llama-cpp docker-build-ik-llama-cpp docker-build-turboquant docker-build-ds4 docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-sglang docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-liquid-audio docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-tinygrad docker-build-kokoros docker-build-sam3-cpp docker-build-rfdetr-cpp docker-build-qwen3-tts-cpp docker-build-vibevoice-cpp docker-build-localvqe docker-build-insightface docker-build-speaker-recognition docker-build-sherpa-onnx docker-build-cloud-proxy

########################################################
### Mock Backend for E2E Tests
Expand Down
7 changes: 7 additions & 0 deletions backend/go/rfdetr-cpp/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
sources/
build*/
package/
librfdetrcpp*.so
rfdetr-cpp
test-models/
test-data/
79 changes: 79 additions & 0 deletions backend/go/rfdetr-cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
cmake_minimum_required(VERSION 3.18)
project(librfdetrcpp LANGUAGES C CXX)

set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Static-link ggml + rfdetr so the resulting .so has no runtime dependency on
# extra ggml/rfdetr shared libraries — only on libc/libstdc++/libgomp, which
# the LocalAI package step bundles into the docker image.
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build static libraries" FORCE)

# rfdetr.cpp build switches: skip CLI/tests, keep static lib.
set(RFDETR_BUILD_CLI OFF CACHE BOOL "Disable rfdetr CLI" FORCE)
set(RFDETR_BUILD_TESTS OFF CACHE BOOL "Disable rfdetr tests" FORCE)
set(RFDETR_SHARED OFF CACHE BOOL "Build rfdetr as static lib" FORCE)

# rt-detr.cpp's top-level CMakeLists invokes
# `bash ${CMAKE_SOURCE_DIR}/scripts/apply_ggml_patches.sh` to apply its
# in-tree ggml patches before descending into the submodule. When we
# `add_subdirectory` it from a parent project, `CMAKE_SOURCE_DIR` points
# at *our* directory, not theirs, so the script path resolves wrong.
#
# Run the patches script ourselves up front (it's idempotent — re-running
# is a no-op once patches are applied) so the rt-detr.cpp configure step
# is essentially a no-op for the patch hook.
set(RFDETR_CPP_SRC ${CMAKE_CURRENT_SOURCE_DIR}/sources/rt-detr.cpp)
if(EXISTS ${RFDETR_CPP_SRC}/scripts/apply_ggml_patches.sh)
execute_process(
COMMAND bash ${RFDETR_CPP_SRC}/scripts/apply_ggml_patches.sh
RESULT_VARIABLE _rfdetr_patch_result
OUTPUT_VARIABLE _rfdetr_patch_output
ERROR_VARIABLE _rfdetr_patch_error
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_STRIP_TRAILING_WHITESPACE)
if(NOT _rfdetr_patch_result EQUAL 0)
message(FATAL_ERROR
"Failed to apply ggml patches (exit ${_rfdetr_patch_result}):\n"
"stdout:\n${_rfdetr_patch_output}\n"
"stderr:\n${_rfdetr_patch_error}")
endif()
message(STATUS "${_rfdetr_patch_output}")
endif()

# Stage a shim 'scripts/apply_ggml_patches.sh' under our source dir so that
# rt-detr.cpp's CMakeLists — which calls
# bash ${CMAKE_SOURCE_DIR}/scripts/apply_ggml_patches.sh
# — finds an idempotent no-op there. The real patches have already been
# applied above; this just satisfies the path lookup.
file(MAKE_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts)
file(WRITE ${CMAKE_CURRENT_SOURCE_DIR}/scripts/apply_ggml_patches.sh
"#!/usr/bin/env bash
# Shim - patches were already applied by the parent CMakeLists.
exit 0
")
execute_process(COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/scripts/apply_ggml_patches.sh)

add_subdirectory(./sources/rt-detr.cpp)

# rfdetr.cpp's C-API symbols already live inside librfdetr (src/rfdetr_capi.cpp
# is compiled into the lib). We re-export them via a MODULE library that
# whole-archive-links rfdetr so the symbols are visible at dlopen time.
add_library(rfdetrcpp MODULE
sources/rt-detr.cpp/src/rfdetr_capi.cpp)

target_include_directories(rfdetrcpp PRIVATE
sources/rt-detr.cpp/include
sources/rt-detr.cpp/src
sources/rt-detr.cpp/third_party/stb
)

target_link_libraries(rfdetrcpp PRIVATE rfdetr ggml)

if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
target_link_libraries(rfdetrcpp PRIVATE stdc++fs)
endif()

set_property(TARGET rfdetrcpp PROPERTY CXX_STANDARD 17)
set_target_properties(rfdetrcpp PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
Loading
Loading