Skip to content

Commit

Permalink
Merge branch 'master' into feat-hf-api-scan
Browse files Browse the repository at this point in the history
  • Loading branch information
mudler authored Jul 10, 2024
2 parents 700c631 + 8d046de commit 7127029
Show file tree
Hide file tree
Showing 11 changed files with 61 additions and 37 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/image-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "5"
cuda-minor-version: "4"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg'
Expand Down Expand Up @@ -119,7 +119,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "5"
cuda-minor-version: "4"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'
Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "8"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11'
Expand All @@ -75,7 +75,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "5"
cuda-minor-version: "4"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12'
Expand All @@ -86,7 +86,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "8"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cublas-cuda11-ffmpeg'
Expand All @@ -100,7 +100,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "5"
cuda-minor-version: "4"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cublas-cuda12-ffmpeg'
Expand Down Expand Up @@ -274,7 +274,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "8"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-core'
Expand All @@ -285,7 +285,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "5"
cuda-minor-version: "4"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-core'
Expand All @@ -296,7 +296,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "8"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-ffmpeg-core'
Expand All @@ -307,7 +307,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "5"
cuda-minor-version: "4"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/image_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ on:
type: string
cuda-minor-version:
description: 'CUDA minor version'
default: "5"
default: "4"
type: string
platforms:
description: 'Platforms'
Expand Down
11 changes: 1 addition & 10 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
sudo apt-get update
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
env:
CUDA_VERSION: 12-5
CUDA_VERSION: 12-4
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v4
Expand Down Expand Up @@ -78,15 +78,6 @@ jobs:
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
GRPC_DIR=$PWD/grpc
# http://google.github.io/googletest/quickstart-cmake.html
# Seems otherwise cross-arch fails to find it
echo "include(FetchContent)" >> $GRPC_DIR/CMakeLists.txt
echo "FetchContent_Declare(" >> $GRPC_DIR/CMakeLists.txt
echo " googletest" >> $GRPC_DIR/CMakeLists.txt
echo " URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip" >> $GRPC_DIR/CMakeLists.txt
echo ")" >> $GRPC_DIR/CMakeLists.txt
echo "FetchContent_MakeAvailable(googletest)" >> $GRPC_DIR/CMakeLists.txt
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
mkdir -p $GRPC_CROSS_BUILD_DIR && \
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers

ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=5
ARG CUDA_MINOR_VERSION=4

ENV BUILD_TYPE=${BUILD_TYPE}

Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ prepare-e2e:
mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests .
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .

run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
Expand Down Expand Up @@ -701,6 +701,7 @@ backend/cpp/llama/llama.cpp:
INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
-DABSL_BUILD_TESTING=OFF \
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
Expand Down
27 changes: 19 additions & 8 deletions core/http/endpoints/openai/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,10 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) {
return nil, fmt.Errorf("unable to find file id %s", id)
}

// GetFilesEndpoint https://platform.openai.com/docs/api-reference/files/retrieve
// GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve
// @Summary Returns information about a specific file.
// @Success 200 {object} File "Response"
// @Router /v1/files/{file_id} [get]
func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
file, err := getFileFromRequest(c)
Expand All @@ -135,13 +138,17 @@ func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applicat
}
}

// DeleteFilesEndpoint https://platform.openai.com/docs/api-reference/files/delete
type DeleteStatus struct {
Id string
Object string
Deleted bool
}

// DeleteFilesEndpoint is the OpenAI API endpoint to delete files https://platform.openai.com/docs/api-reference/files/delete
// @Summary Delete a file.
// @Success 200 {object} DeleteStatus "Response"
// @Router /v1/files/{file_id} [delete]
func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
type DeleteStatus struct {
Id string
Object string
Deleted bool
}

return func(c *fiber.Ctx) error {
file, err := getFileFromRequest(c)
Expand Down Expand Up @@ -174,7 +181,11 @@ func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
}
}

// GetFilesContentsEndpoint https://platform.openai.com/docs/api-reference/files/retrieve-contents
// GetFilesContentsEndpoint is the OpenAI API endpoint to get files content https://platform.openai.com/docs/api-reference/files/retrieve-contents
// @Summary Returns information about a specific file.
// @Success 200 {string} binary "file"
// @Router /v1/files/{file_id}/content [get]
// GetFilesContentsEndpoint
func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
file, err := getFileFromRequest(c)
Expand Down
9 changes: 5 additions & 4 deletions core/http/endpoints/openai/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ import (
"github.com/mudler/LocalAI/core/services"
)

// ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models
// @Summary List and describe the various models available in the API.
// @Success 200 {object} schema.ModelsDataResponse "Response"
// @Router /v1/models [get]
func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
// If blank, no filter is applied.
Expand All @@ -18,10 +22,7 @@ func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) er
if err != nil {
return err
}
return c.JSON(struct {
Object string `json:"object"`
Data []schema.OpenAIModel `json:"data"`
}{
return c.JSON(schema.ModelsDataResponse{
Object: "list",
Data: dataModels,
})
Expand Down
8 changes: 6 additions & 2 deletions core/p2p/p2p.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,11 +348,15 @@ func newNodeOpts(token string) ([]node.Option, error) {
llger := logger.New(log.LevelFatal)
defaultInterval := 10 * time.Second

// TODO: move this up, expose more config options when creating a node
noDHT := os.Getenv("LOCALAI_P2P_DISABLE_DHT") == "true"
noLimits := os.Getenv("LOCALAI_P2P_DISABLE_LIMITS") == "true"

loglevel := "info"

c := config.Config{
Limit: config.ResourceLimit{
Enable: true,
Enable: !noLimits,
MaxConns: 100,
},
NetworkToken: token,
Expand All @@ -372,7 +376,7 @@ func newNodeOpts(token string) ([]node.Option, error) {
RateLimitInterval: defaultInterval,
},
Discovery: config.Discovery{
DHT: true,
DHT: noDHT,
MDNS: true,
Interval: 30 * time.Second,
},
Expand Down
5 changes: 5 additions & 0 deletions core/schema/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,8 @@ type OpenAIRequest struct {
// AutoGPTQ
ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
}

type ModelsDataResponse struct {
Object string `json:"object"`
Data []OpenAIModel `json:"data"`
}
11 changes: 11 additions & 0 deletions docs/content/docs/features/distributed_inferencing.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,14 @@ The server logs should indicate that new workers are being discovered.
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
- Only a single model is supported currently.
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.


## Environment Variables

There are options that can be tweaked or parameters that can be set using environment variables

| Environment Variable | Description |
|----------------------|-------------|
| **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
| **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management |
| **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |

0 comments on commit 7127029

Please sign in to comment.