Skip to content

Commit

Permalink
feat(sycl): Add support for Intel GPUs with sycl (#1647) (#1660)
Browse files Browse the repository at this point in the history
* feat(sycl): Add sycl support (#1647)

* onekit: install without prompts

* set cmake args only in grpc-server

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* cleanup

* fixup sycl source env

* Cleanup docs

* ci: runs on self-hosted

* fix typo

* bump llama.cpp

* llama.cpp: update server

* adapt to upstream changes

* adapt to upstream changes

* docs: add sycl

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
  • Loading branch information
mudler committed Feb 1, 2024
1 parent 16cebf0 commit 1c57f8d
Show file tree
Hide file tree
Showing 13 changed files with 917 additions and 755 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/image-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ jobs:
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: 'sycl-f16-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
Expand Down
28 changes: 28 additions & 0 deletions .github/workflows/image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,34 @@ jobs:
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: 'sycl-f16-core'
ffmpeg: 'false'
image-type: 'core'
runs-on: 'arc-runner-set'
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: 'sycl-f32-core'
ffmpeg: 'false'
image-type: 'core'
runs-on: 'arc-runner-set'
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: 'sycl-f16-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: 'sycl-f32-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
Expand Down
9 changes: 8 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ ARG GO_VERSION=1.21-bullseye
ARG IMAGE_TYPE=extras
# extras or core


FROM golang:$GO_VERSION as requirements-core

ARG BUILD_TYPE
Expand Down Expand Up @@ -38,6 +37,14 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
apt-get update && \
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
; fi

# oneapi requirements
RUN if [ "${BUILD_TYPE}" = "sycl_f16" ] || [ "${BUILD_TYPE}" = "sycl_f32" ]; then \
wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/163da6e4-56eb-4948-aba3-debcec61c064/l_BaseKit_p_2024.0.1.46_offline.sh && \
sh ./l_BaseKit_p_2024.0.1.46_offline.sh -a -s --eula accept && \
rm -rf l_BaseKit_p_2024.0.1.46_offline.sh \
; fi

ENV PATH /usr/local/cuda/bin:${PATH}

# OpenBLAS requirements and stable diffusion
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0

GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7

CPPLLAMA_VERSION?=e0085fdf7c758f0bc2746fc106fb29dd9df959de
CPPLLAMA_VERSION?=1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915

# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
Expand Down
2 changes: 1 addition & 1 deletion backend/cpp/llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ add_library(hw_grpc_proto
${hw_proto_srcs}
${hw_proto_hdrs} )

add_executable(${TARGET} grpc-server.cpp json.hpp )
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
absl::flags_parse
gRPC::${_REFLECTION}
Expand Down
15 changes: 15 additions & 0 deletions backend/cpp/llama/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ LLAMA_VERSION?=

CMAKE_ARGS?=
BUILD_TYPE?=
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh

# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
Expand All @@ -19,6 +20,14 @@ else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
endif

ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
endif

ifeq ($(BUILD_TYPE),sycl_f32)
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
endif

llama.cpp:
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
if [ -z "$(LLAMA_VERSION)" ]; then \
Expand All @@ -31,6 +40,7 @@ llama.cpp/examples/grpc-server:
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/utils.hpp llama.cpp/examples/grpc-server/
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future.
Expand All @@ -49,5 +59,10 @@ clean:
rm -rf grpc-server

grpc-server: llama.cpp llama.cpp/examples/grpc-server
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
bash -c "source $(ONEAPI_VARS); \
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release"
else
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
endif
cp llama.cpp/build/bin/grpc-server .
Loading

0 comments on commit 1c57f8d

Please sign in to comment.