Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions backend/cpp/llama-cpp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ NATIVE?=false
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
TARGET?=--target grpc-server
JOBS?=$(shell nproc)
ARCH?=$(shell uname -m)

# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
Expand Down Expand Up @@ -106,21 +107,39 @@ llama-cpp-avx: llama.cpp
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build purge
$(info ${GREEN}I llama-cpp build info:avx${RESET})
ifeq ($(OS),Darwin)
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
else
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS=-mno-bmi2 -DCMAKE_CXX_FLAGS=-mno-bmi2" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
endif
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx

llama-cpp-fallback: llama.cpp
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build purge
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
ifeq ($(OS),Darwin)
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
else
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS='-mno-bmi -mno-bmi2' -DCMAKE_CXX_FLAGS='-mno-bmi -mno-bmi2'" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
endif
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback

llama-cpp-grpc: llama.cpp
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
ifeq ($(OS),Darwin)
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
else
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS='-mno-bmi -mno-bmi2' -DCMAKE_CXX_FLAGS='-mno-bmi -mno-bmi2'" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
endif
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc

llama-cpp-rpc-server: llama-cpp-grpc
Expand Down
25 changes: 25 additions & 0 deletions gallery/index.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,29 @@
---
- name: "rwkv7-g1c-13.3b"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
- https://huggingface.co/NaomiBTW/rwkv7-g1c-13.3b-gguf
description: |
The model is **RWKV7 g1c 13B**, a large language model optimized for efficiency. It is quantized using **Bartowski's calibrationv5 for imatrix** to reduce memory usage while maintaining performance. The base model is **BlinkDL/rwkv7-g1**, and this version is tailored for text-generation tasks. It balances accuracy and efficiency, making it suitable for deployment in various applications.
overrides:
parameters:
model: llama-cpp/models/rwkv7-g1c-13.3b-20251231-Q8_0.gguf
name: rwkv7-g1c-13.3b-gguf
backend: llama-cpp
template:
use_tokenizer_template: true
known_usecases:
- chat
function:
grammar:
disable: true
description: Imported from https://huggingface.co/NaomiBTW/rwkv7-g1c-13.3b-gguf
options:
- use_jinja:true
files:
- filename: llama-cpp/models/rwkv7-g1c-13.3b-20251231-Q8_0.gguf
sha256: e06b3b31cee207723be00425cfc25ae09b7fa1abbd7d97eda4e62a7ef254f877
uri: https://huggingface.co/NaomiBTW/rwkv7-g1c-13.3b-gguf/resolve/main/rwkv7-g1c-13.3b-20251231-Q8_0.gguf
- name: "iquest-coder-v1-40b-instruct-i1"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
Expand Down
Loading