Skip to content

Commit

Permalink
Build llama.cpp with SYCL support
Browse files Browse the repository at this point in the history
Add gpu_info_oneapi

Add oneapi to gpu.go

Fix oneAPI linking by using icx compiler and updating rpath

Add oneAPI integrated GPU detection

Update rocky linux gcc version (rocky linux 9 only has gcc 12+)

Add oneapi docker build

fix ollama compile

Build llama.cpp with SYCL support

Add oneapi to gpu.go

Fix oneAPI linking by using icx compiler and updating rpath

Add oneAPI integrated GPU detection

Add oneapi docker build

fix lint & compile

typo

update build_docker.sh

update doc

update windows build script

keep libpath

update

update llama.cpp version for windows

use gnu compiler to build static-lib in oneapi env

add fall back log

fix docker build script

oneapi image(ubuntu 22.04) works well with ARC770 GPU
  • Loading branch information
felipeagc authored and zhewang1-intc committed Apr 11, 2024
1 parent 9446b79 commit ac30dc4
Show file tree
Hide file tree
Showing 11 changed files with 594 additions and 19 deletions.
26 changes: 25 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,22 @@ ARG AMDGPU_TARGETS
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
RUN mkdir /tmp/scratch && \
for dep in $(cat /go/src/github.com/ollama/ollama/llm/llama.cpp/build/linux/x86_64/rocm*/lib/deps.txt) ; do \
cp ${dep} /tmp/scratch/ || exit 1 ; \
cp ${dep} /tmp/scratch/ || exit 1 ; \
done && \
(cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \
mkdir -p /go/src/github.com/ollama/ollama/dist/deps/ && \
(cd /tmp/scratch/ && tar czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz . )


FROM --platform=linux/amd64 intel/oneapi-basekit:2024.0.1-devel-rockylinux9 AS oneapi-build-amd64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh

FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
ARG CMAKE_VERSION
ARG GOLANG_VERSION
Expand Down Expand Up @@ -98,6 +107,7 @@ COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linu
COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/
COPY --from=oneapi-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
ARG GOFLAGS
ARG CGO_CFLAGS
RUN go build -trimpath .
Expand Down Expand Up @@ -132,6 +142,20 @@ ENV OLLAMA_HOST 0.0.0.0
ENTRYPOINT ["/bin/ollama"]
CMD ["serve"]

# oneAPI images are much larger so we keep it distinct from the CPU/CUDA image
# use ubuntu oneapi-basekit image as runtime image, need to use --device param to mount GPU to container.
# e.g. docker run -it -v "$(pwd):/app:Z" --device /dev/dri/renderD128:/dev/dri/renderD129 --device /dev/dri/card1:/dev/dri/card1 IMAGE_ID
# host machine need to install Intel GPU driver correctly.
# to detect the device info which you want to mount, run sudo intel_gpu_top -L
FROM --platform=linux/amd64 intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 as runtime-oneapi
RUN update-pciids
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
EXPOSE 11434
ENV OLLAMA_HOST 0.0.0.0

ENTRYPOINT ["/bin/ollama"]
CMD ["serve"]

FROM runtime-$TARGETARCH
EXPOSE 11434
ENV OLLAMA_HOST 0.0.0.0
Expand Down
21 changes: 21 additions & 0 deletions docs/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,27 @@ go build .

ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.

#### Linux oneAPI (Intel)

_Your operating system distribution may already have packages for Intel oneAPI and Intel GPU driver. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!_

Install [oneAPI](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) development packages and [Intel GPU driver](https://dgpu-docs.intel.com/driver/installation.html) first, as well as `cmake` and `golang`.

Typically the build scripts will auto-detect oneAPI, however, if your Linux distro
or installation approach uses unusual paths, you can specify the location by
specifying an environment variable `ONEAPI_ROOT` to the location of the shared
libraries and the location of the icpx/icx compiler.

```
go generate ./...
```

Then build the binary:

```
go build .
```

#### Advanced CPU Settings

By default, running `go generate ./...` will compile a few different variations
Expand Down
74 changes: 71 additions & 3 deletions gpu/gpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
type handles struct {
nvml *C.nvml_handle_t
cudart *C.cudart_handle_t
oneapi *C.oneapi_handle_t
}

const (
Expand Down Expand Up @@ -83,16 +84,27 @@ var CudartWindowsGlobs = []string{
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")

var OneapiWindowsGlobs = []string{
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
}

var OneapiLinuxGlobs = []string{
"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
"/usr/lib*/libze_intel_gpu.so*",
}

// Note: gpuMutex must already be held
func initGPUHandles() *handles {

// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing

gpuHandles := &handles{nil, nil}
gpuHandles := &handles{nil, nil, nil}
var nvmlMgmtName string
var nvmlMgmtPatterns []string
var cudartMgmtName string
var cudartMgmtPatterns []string
var oneapiMgmtName string
var oneapiMgmtPatterns []string

tmpDir, _ := PayloadsDir()
switch runtime.GOOS {
Expand All @@ -104,6 +116,9 @@ func initGPUHandles() *handles {
localAppData := os.Getenv("LOCALAPPDATA")
cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
oneapiMgmtName = "ze_intel_gpu64.dll"
oneapiMgmtPatterns = make([]string, len(OneapiWindowsGlobs))
copy(oneapiMgmtPatterns, OneapiWindowsGlobs)
case "linux":
nvmlMgmtName = "libnvidia-ml.so"
nvmlMgmtPatterns = make([]string, len(NvmlLinuxGlobs))
Expand All @@ -114,6 +129,9 @@ func initGPUHandles() *handles {
cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
}
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
oneapiMgmtName = "libze_intel_gpu.so"
oneapiMgmtPatterns = make([]string, len(OneapiLinuxGlobs))
copy(oneapiMgmtPatterns, OneapiLinuxGlobs)
default:
return gpuHandles
}
Expand All @@ -139,6 +157,17 @@ func initGPUHandles() *handles {
return gpuHandles
}
}

oneapiLibPaths := FindGPULibs(oneapiMgmtName, oneapiMgmtPatterns)
if len(oneapiLibPaths) > 0 {
oneapi := LoadOneapiMgmt(oneapiLibPaths)
if oneapi != nil {
slog.Info("Intel GPU detected")
gpuHandles.oneapi = oneapi
return gpuHandles
}
}

return gpuHandles
}

Expand Down Expand Up @@ -206,6 +235,28 @@ func GetGPUInfo() GpuInfo {
slog.Info(fmt.Sprintf("[cudart] CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor))
}
}
} else if gpuHandles.oneapi != nil && (cpuVariant != "" || runtime.GOARCH != "amd64") {
C.oneapi_check_vram(*gpuHandles.oneapi, &memInfo)
if memInfo.err != nil {
slog.Info(fmt.Sprintf("error looking up OneAPI GPU memory: %s", C.GoString(memInfo.err)))
C.free(unsafe.Pointer(memInfo.err))
} else if memInfo.igpu_index >= 0 && memInfo.count == 1 {
// Only one GPU detected and it appears to be an integrated GPU - skip it
slog.Info("OneAPI unsupported integrated GPU detected")
} else if memInfo.count > 0 {
if memInfo.igpu_index >= 0 {
// We have multiple GPUs reported, and one of them is an integrated GPU
// so we have to set the env var to bypass it
// If the user has specified their own SYCL_DEVICE_ALLOWLIST, don't clobber it
val := os.Getenv("SYCL_DEVICE_ALLOWLIST")
if val == "" {
val = "DeviceType:gpu"
os.Setenv("SYCL_DEVICE_ALLOWLIST", val)
}
slog.Info(fmt.Sprintf("oneAPI integrated GPU detected - SYCL_DEVICE_ALLOWLIST=%s", val))
}
resp.Library = "oneapi"
}
} else {
AMDGetGPUInfo(&resp)
if resp.Library != "" {
Expand Down Expand Up @@ -254,8 +305,8 @@ func CheckVRAM() (uint64, error) {
return uint64(avail), nil
}
gpuInfo := GetGPUInfo()
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
return gpuInfo.FreeMemory, nil
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm" || gpuInfo.Library == "oneapi") {
return int64(gpuInfo.FreeMemory), nil
}

return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
Expand Down Expand Up @@ -349,6 +400,23 @@ func LoadCUDARTMgmt(cudartLibPaths []string) *C.cudart_handle_t {
return nil
}

func LoadOneapiMgmt(oneapiLibPaths []string) *C.oneapi_handle_t {
var resp C.oneapi_init_resp_t
resp.oh.verbose = getVerboseState()
for _, libPath := range oneapiLibPaths {
lib := C.CString(libPath)
defer C.free(unsafe.Pointer(lib))
C.oneapi_init(lib, &resp)
if resp.err != nil {
slog.Info(fmt.Sprintf("Unable to load oneAPI management library %s: %s", libPath, C.GoString(resp.err)))
C.free(unsafe.Pointer(resp.err))
} else {
return &resp.oh
}
}
return nil
}

func getVerboseState() C.uint16_t {
if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
return C.uint16_t(1)
Expand Down
3 changes: 2 additions & 1 deletion gpu/gpu_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ void cpu_check_ram(mem_info_t *resp);

#include "gpu_info_nvml.h"
#include "gpu_info_cudart.h"
#include "gpu_info_oneapi.h"

#endif // __GPU_INFO_H__
#endif // __APPLE__
#endif // __APPLE__
Loading

0 comments on commit ac30dc4

Please sign in to comment.