Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
6988da9
cmake : correct order of sycl flags (#9497)
Xarbirus Sep 15, 2024
e6deac3
gguf-split : add basic checks (#9499)
slaren Sep 15, 2024
6262d13
common : reimplement logging (#9418)
ggerganov Sep 15, 2024
90a2fff
flake.lock: Update (#9488)
ggerganov Sep 16, 2024
c4965a6
metal : handle zero-sized allocs (#9466)
ggerganov Sep 16, 2024
441b72b
main : option to disable context shift (#9484)
VJHack Sep 16, 2024
95ca851
llama : support MiniCPM3 (#9322)
CarryFun Sep 16, 2024
0aadac1
llama : support OLMoE (#9462)
2015aroras Sep 16, 2024
5c3d0f1
ggml : IQ4_NL sgemm + Q4_0 AVX optimization (#9422)
netrunnereve Sep 16, 2024
19514d6
cmake : do not hide GGML options + rename option (#9465)
ggerganov Sep 16, 2024
d54c21d
convert : identify missing model files (#9397)
compilade Sep 16, 2024
a6a3a5c
ggml : link MATH_LIBRARY not by its full path (#9339)
Xarbirus Sep 16, 2024
acb2c32
llama : rename n_embed to n_embd in rwkv6_time_mix (#9504)
danbev Sep 16, 2024
23e0d70
ggml : move common CPU backend impl to new header (#9509)
slaren Sep 16, 2024
37f3a38
llama : add llama_n_head() (#9512)
Xarbirus Sep 17, 2024
0d2ec43
llama : support IBM Granite architecture (#9412)
gabe-l-hart Sep 17, 2024
503147a
unicode : add <algorithm> (#9508)
ykhrustalev Sep 17, 2024
0226613
threadpool : skip polling for unused threads (#9461)
max-krasnyansky Sep 17, 2024
8344ef5
llama : fix n_vocab init for 'no_vocab' case (#9511)
Xarbirus Sep 17, 2024
8b836ae
arg : add env variable for parallel (#9513)
bertwagner Sep 17, 2024
7be099f
llama-bench: correct argument parsing error message (#9524)
Xarbirus Sep 17, 2024
faf67b3
[SYCL]set context default value to avoid memory issue, update guide (…
NeoZhangJianyu Sep 18, 2024
f799155
server : fix OpenSSL build (remove obsolete `LOG_INFO`) (#9529)
EZForever Sep 18, 2024
8a30835
server : match OAI structured output response (#9527)
VJHack Sep 18, 2024
6443ddd
llama : use reserve/emplace_back in sampler_sample (#9534)
danbev Sep 18, 2024
0d2f22e
scripts : verify py deps at the start of compare (#9520)
ggerganov Sep 18, 2024
64c6af3
ggml : fix n_threads_cur initialization with one thread (#9538)
slaren Sep 18, 2024
eca0fab
imatrix : disable prompt escape by default (#9543)
CISC Sep 19, 2024
6026da5
server : clean-up completed tasks from waiting list (#9531)
ggerganov Sep 19, 2024
722ec1e
perplexity : do not escape input data by default (#9548)
CISC Sep 20, 2024
d39e267
examples : flush log upon ctrl+c (#9559)
ggerganov Sep 20, 2024
5cb12f6
CUDA: fix sum.cu compilation for CUDA < 11.7 (#9562)
JohannesGaessler Sep 20, 2024
a6809c6
examples : add null threadpool args where needed (ggml/0)
ggerganov Sep 8, 2024
424c5d0
ggml/examples: add backend support for numerical optimization (ggml/949)
JohannesGaessler Sep 20, 2024
4301535
sync : ggml
ggerganov Sep 20, 2024
27609c4
ggml : fix trailing whitespace (#0)
ggerganov Sep 20, 2024
d13edb1
ggml : fix builds (#0)
ggerganov Sep 20, 2024
6335114
quantize : improve type name parsing (#9570)
slaren Sep 20, 2024
e948a7d
CI: Provide prebuilt windows binary for hip (#9467)
no1wudi Sep 21, 2024
41f4778
Update CUDA graph on scale change plus clear nodes/params (#9550)
agray3 Sep 21, 2024
d09770c
ggml-alloc : fix list of allocated tensors with GGML_ALLOCATOR_DEBUG …
slaren Sep 21, 2024
2a63caa
RWKV v6: RWKV_WKV op CUDA implementation (#9454)
MollySophia Sep 22, 2024
ecd5d6b
llama: remove redundant loop when constructing ubatch (#9574)
shankarg87 Sep 22, 2024
a5b57b0
CUDA: enable Gemma FA for HIP/Pascal (#9581)
JohannesGaessler Sep 22, 2024
912c331
Fix merge error in #9454 (#9589)
MollySophia Sep 22, 2024
c35e586
musa: enable building fat binaries, enable unified memory, and disabl…
yeahdongcn Sep 22, 2024
e62e978
Revert "[SYCL] fallback mmvq (#9088)" (#9579)
Sep 23, 2024
ff3c559
Merge branch 'layla-build' into merge
l3utterfly Sep 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 71 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
GGML_NLOOP: 3
GGML_N_THREADS: 1
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1

jobs:
macOS-latest-cmake-arm64:
Expand Down Expand Up @@ -964,6 +967,7 @@ jobs:
name: llama-bin-win-sycl-x64.zip

windows-latest-cmake-hip:
if: ${{ github.event.inputs.create_release != 'true' }}
runs-on: windows-latest

steps:
Expand Down Expand Up @@ -991,8 +995,72 @@ jobs:
run: |
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON
cmake --build build --config Release
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}

windows-latest-cmake-hip-release:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
runs-on: windows-latest

strategy:
matrix:
gpu_target: [gfx1100, gfx1101, gfx1030]

steps:
- name: Clone
id: checkout
uses: actions/checkout@v4

- name: Install
id: depends
run: |
$ErrorActionPreference = "Stop"
write-host "Downloading AMD HIP SDK Installer"
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
write-host "Installing AMD HIP SDK"
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
write-host "Completed AMD HIP SDK installation"

- name: Verify ROCm
id: verify
run: |
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version

- name: Build
id: cmake_build
run: |
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
md "build\bin\rocblas\library\"
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"

- name: Determine tag name
id: tag
shell: bash
run: |
BUILD_NUMBER="$(git rev-list --count HEAD)"
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
else
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
fi

- name: Pack artifacts
id: pack_artifacts
run: |
7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*

- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip

ios-xcode-build:
runs-on: macos-latest
Expand Down Expand Up @@ -1057,6 +1125,7 @@ jobs:
- macOS-latest-cmake
- windows-latest-cmake
- windows-latest-cmake-cuda
- windows-latest-cmake-hip-release
- macOS-latest-cmake-arm64
- macOS-latest-cmake-x64

Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ on:
types: [opened, synchronize, reopened]
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']

env:
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
LLAMA_LOG_VERBOSITY: 10

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
Expand Down
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,11 @@ set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})

# change the default for these ggml options
if (NOT DEFINED GGML_LLAMAFILE)
set(GGML_LLAMAFILE ON)
set(GGML_LLAMAFILE_DEFAULT ON)
endif()

if (NOT DEFINED GGML_CUDA_USE_GRAPHS)
set(GGML_CUDA_USE_GRAPHS ON)
if (NOT DEFINED GGML_CUDA_GRAPHS)
set(GGML_CUDA_GRAPHS_DEFAULT ON)
endif()

# transition helpers
Expand Down
42 changes: 31 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ TEST_TARGETS = \
tests/test-grammar-parser \
tests/test-json-schema-to-grammar \
tests/test-llama-grammar \
tests/test-log \
tests/test-model-load-cancel \
tests/test-opt \
tests/test-quantize-fns \
Expand Down Expand Up @@ -148,6 +149,14 @@ GGML_NO_METAL := 1
DEPRECATE_WARNING := 1
endif

ifdef LLAMA_DISABLE_LOGS
REMOVE_WARNING := 1
endif

ifdef LLAMA_SERVER_VERBOSE
REMOVE_WARNING := 1
endif

ifndef UNAME_S
UNAME_S := $(shell uname -s)
endif
Expand Down Expand Up @@ -351,19 +360,11 @@ ifdef LLAMA_SANITIZE_UNDEFINED
MK_LDFLAGS += -fsanitize=undefined -g
endif

ifdef LLAMA_SERVER_VERBOSE
MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
endif

ifdef LLAMA_SERVER_SSL
MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
MK_LDFLAGS += -lssl -lcrypto
endif

ifdef LLAMA_DISABLE_LOGS
MK_CPPFLAGS += -DLOG_DISABLE_LOGS
endif # LLAMA_DISABLE_LOGS

# warnings
WARN_FLAGS = \
-Wall \
Expand Down Expand Up @@ -610,15 +611,15 @@ ifdef GGML_CUDA

MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include
MK_LDFLAGS += -lmusa -lmublas -lmusart -lpthread -ldl -lrt -L$(CUDA_PATH)/lib -L/usr/lib64
MK_NVCCFLAGS += -x musa -mtgpu --cuda-gpu-arch=mp_22
MK_NVCCFLAGS += -x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22
else
ifneq ('', '$(wildcard /opt/cuda)')
CUDA_PATH ?= /opt/cuda
else
CUDA_PATH ?= /usr/local/cuda
endif

MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
MK_NVCCFLAGS += -use_fast_math
endif # GGML_MUSA
Expand Down Expand Up @@ -931,6 +932,7 @@ OBJ_LLAMA = \
OBJ_COMMON = \
common/common.o \
common/arg.o \
common/log.o \
common/console.o \
common/ngram-cache.o \
common/sampling.o \
Expand Down Expand Up @@ -1027,6 +1029,14 @@ $(info - LLAMA_NO_CCACHE)
$(info )
endif

ifdef REMOVE_WARNING
$(info !!! REMOVAL WARNING !!!)
$(info The following LLAMA_ options have been removed and are no longer supported)
$(info - LLAMA_DISABLE_LOGS (https://github.com/ggerganov/llama.cpp/pull/9418))
$(info - LLAMA_SERVER_VERBOSE (https://github.com/ggerganov/llama.cpp/pull/9418))
$(info )
endif

#
# Build libraries
#
Expand Down Expand Up @@ -1168,6 +1178,11 @@ common/arg.o: \
common/arg.h
$(CXX) $(CXXFLAGS) -c $< -o $@

common/log.o: \
common/log.cpp \
common/log.h
$(CXX) $(CXXFLAGS) -c $< -o $@

common/sampling.o: \
common/sampling.cpp \
common/sampling.h \
Expand Down Expand Up @@ -1346,7 +1361,7 @@ llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp \
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp \
$(OBJ_GGML) $(OBJ_LLAMA)
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

Expand Down Expand Up @@ -1528,6 +1543,11 @@ tests/test-llama-grammar: tests/test-llama-grammar.cpp \
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

tests/test-log: tests/test-log.cpp \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

tests/test-grammar-parser: tests/test-grammar-parser.cpp \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ Typically finetunes of the base models below are supported as well.
- [x] [SEA-LION](https://huggingface.co/models?search=sea-lion)
- [x] [GritLM-7B](https://huggingface.co/GritLM/GritLM-7B) + [GritLM-8x7B](https://huggingface.co/GritLM/GritLM-8x7B)
- [x] [OLMo](https://allenai.org/olmo)
- [x] [OLMoE](https://huggingface.co/allenai/OLMoE-1B-7B-0924)
- [x] [Granite models](https://huggingface.co/collections/ibm-granite/granite-code-models-6624c5cec322e4c148c8b330)
- [x] [GPT-NeoX](https://github.com/EleutherAI/gpt-neox) + [Pythia](https://github.com/EleutherAI/pythia)
- [x] [Snowflake-Arctic MoE](https://huggingface.co/collections/Snowflake/arctic-66290090abe542894a5ac520)
Expand Down
3 changes: 3 additions & 0 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,9 @@ function gg_sum_embd_bge_small {

## main

export LLAMA_LOG_PREFIX=1
export LLAMA_LOG_TIMESTAMPS=1

if [ -z ${GG_BUILD_LOW_PERF} ]; then
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
rm -rf ${SRC}/models-mnt
Expand Down
22 changes: 12 additions & 10 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,21 +51,23 @@ endif()
set(TARGET common)

add_library(${TARGET} STATIC
arg.cpp
arg.h
base64.hpp
common.h
common.cpp
arg.h
arg.cpp
sampling.h
sampling.cpp
console.h
common.h
console.cpp
json.hpp
console.h
json-schema-to-grammar.cpp
train.h
train.cpp
ngram-cache.h
json.hpp
log.cpp
log.h
ngram-cache.cpp
ngram-cache.h
sampling.cpp
sampling.h
train.cpp
train.h
)

if (BUILD_SHARED_LIBS)
Expand Down
Loading
Loading