Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: backends improvements #778

Merged
merged 6 commits into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:

sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo pip install -r extra/requirements.txt

sudo mkdir /build && sudo chmod -R 777 /build && cd /build && \
curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v1.11.0.tar.gz" | \
Expand All @@ -45,7 +46,6 @@ jobs:
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /lib64/ && \
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/

- name: Test
run: |
ESPEAK_DATA="/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data" GO_TAGS="tts stablediffusion" make test
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ go-llama
/gpt4all
go-stable-diffusion
go-piper
/go-bert
go-ggllm
/piper

__pycache__/
*.a
get-sources

Expand Down
7 changes: 6 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,15 @@ ARG TARGETARCH
ARG TARGETVARIANT

ENV BUILD_TYPE=${BUILD_TYPE}
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/huggingface.py"
ARG GO_TAGS="stablediffusion tts"

RUN apt-get update && \
apt-get install -y ca-certificates cmake curl patch
apt-get install -y ca-certificates cmake curl patch pip

# Extras requirements
COPY extra/requirements.txt /build/extra/requirements.txt
RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt

# CuBLAS requirements
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
Expand Down
13 changes: 8 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ test: prepare test-models/testmodel grpcs
@echo 'Running tests'
export GO_TAGS="tts stablediffusion"
$(MAKE) prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
HUGGINGFACE_GRPC=$(abspath ./)/extra/grpc/huggingface/huggingface.py TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama" --flake-attempts 5 -v -r ./api ./pkg
$(MAKE) test-gpt4all
$(MAKE) test-llama
Expand All @@ -338,9 +338,7 @@ test-stablediffusion: prepare-test

test-container:
docker build --target requirements -t local-ai-test-container .
docker run --name localai-tests -e GO_TAGS=$(GO_TAGS) -ti -v $(abspath ./):/build local-ai-test-container make test
docker rm localai-tests
docker rmi local-ai-test-container
docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container

## Help:
help: ## Show this help.
Expand All @@ -354,10 +352,15 @@ help: ## Show this help.
else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \
}' $(MAKEFILE_LIST)

protogen:
protogen: protogen-go protogen-python

protogen-go:
protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative \
pkg/grpc/proto/backend.proto

protogen-python:
python -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/huggingface/ --grpc_python_out=extra/grpc/huggingface/ pkg/grpc/proto/backend.proto

## GRPC

backend-assets/grpc:
Expand Down
79 changes: 63 additions & 16 deletions api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ var _ = Describe("API test", func() {
var cancel context.CancelFunc
var tmpdir string

commonOpts := []options.AppOption{
options.WithDebug(true),
options.WithDisableMessage(true),
}

Context("API with ephemeral models", func() {
BeforeEach(func() {
var err error
Expand All @@ -143,7 +148,7 @@ var _ = Describe("API test", func() {
Name: "bert2",
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
Overrides: map[string]interface{}{"foo": "bar"},
AdditionalFiles: []gallery.File{gallery.File{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
},
}
out, err := yaml.Marshal(g)
Expand All @@ -159,9 +164,10 @@ var _ = Describe("API test", func() {
}

app, err = App(
options.WithContext(c),
options.WithGalleries(galleries),
options.WithModelLoader(modelLoader), options.WithBackendAssets(backendAssets), options.WithBackendAssetsOutput(tmpdir))
append(commonOpts,
options.WithContext(c),
options.WithGalleries(galleries),
options.WithModelLoader(modelLoader), options.WithBackendAssets(backendAssets), options.WithBackendAssetsOutput(tmpdir))...)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")

Expand Down Expand Up @@ -400,13 +406,14 @@ var _ = Describe("API test", func() {
}

app, err = App(
options.WithContext(c),
options.WithAudioDir(tmpdir),
options.WithImageDir(tmpdir),
options.WithGalleries(galleries),
options.WithModelLoader(modelLoader),
options.WithBackendAssets(backendAssets),
options.WithBackendAssetsOutput(tmpdir),
append(commonOpts,
options.WithContext(c),
options.WithAudioDir(tmpdir),
options.WithImageDir(tmpdir),
options.WithGalleries(galleries),
options.WithModelLoader(modelLoader),
options.WithBackendAssets(backendAssets),
options.WithBackendAssetsOutput(tmpdir))...,
)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
Expand Down Expand Up @@ -500,7 +507,12 @@ var _ = Describe("API test", func() {
c, cancel = context.WithCancel(context.Background())

var err error
app, err = App(options.WithContext(c), options.WithModelLoader(modelLoader))
app, err = App(
append(commonOpts,
options.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
options.WithContext(c),
options.WithModelLoader(modelLoader),
)...)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")

Expand All @@ -524,7 +536,7 @@ var _ = Describe("API test", func() {
It("returns the models list", func() {
models, err := client.ListModels(context.TODO())
Expect(err).ToNot(HaveOccurred())
Expect(len(models.Models)).To(Equal(10))
Expect(len(models.Models)).To(Equal(11))
})
It("can generate completions", func() {
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
Expand Down Expand Up @@ -555,7 +567,7 @@ var _ = Describe("API test", func() {
})

It("returns errors", func() {
backends := len(model.AutoLoadBackends)
backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends)))
Expand Down Expand Up @@ -602,6 +614,36 @@ var _ = Describe("API test", func() {
Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
})

Context("External gRPC calls", func() {
It("calculate embeddings with huggingface", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: openai.AdaCodeSearchCode,
Input: []string{"sun", "cat"},
},
)
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))

sunEmbedding := resp.Data[0].Embedding
resp2, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: openai.AdaCodeSearchCode,
Input: []string{"sun"},
},
)
Expect(err).ToNot(HaveOccurred())
Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
Expect(resp2.Data[0].Embedding).ToNot(Equal(resp.Data[1].Embedding))
})
})

Context("backends", func() {
It("runs rwkv completion", func() {
if runtime.GOOS != "linux" {
Expand Down Expand Up @@ -674,7 +716,12 @@ var _ = Describe("API test", func() {
c, cancel = context.WithCancel(context.Background())

var err error
app, err = App(options.WithContext(c), options.WithModelLoader(modelLoader), options.WithConfigFile(os.Getenv("CONFIG_FILE")))
app, err = App(
append(commonOpts,
options.WithContext(c),
options.WithModelLoader(modelLoader),
options.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")

Expand All @@ -696,7 +743,7 @@ var _ = Describe("API test", func() {
It("can generate chat completions from config file", func() {
models, err := client.ListModels(context.TODO())
Expect(err).ToNot(HaveOccurred())
Expect(len(models.Models)).To(Equal(12))
Expect(len(models.Models)).To(Equal(13))
})
It("can generate chat completions from config file", func() {
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
Expand Down
4 changes: 4 additions & 0 deletions api/backend/embeddings.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
model.WithContext(o.Context),
}

for k, v := range o.ExternalGRPCBackends {
opts = append(opts, model.WithExternalBackend(k, v))
}

if c.Backend == "" {
inferenceModel, err = loader.GreedyLoader(opts...)
} else {
Expand Down
10 changes: 9 additions & 1 deletion api/backend/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,20 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
return nil, fmt.Errorf("endpoint only working with stablediffusion models")
}

inferenceModel, err := loader.BackendLoader(
opts := []model.Option{
model.WithBackendString(c.Backend),
model.WithAssetDir(o.AssetsDestination),
model.WithThreads(uint32(c.Threads)),
model.WithContext(o.Context),
model.WithModelFile(c.ImageGenerationAssets),
}

for k, v := range o.ExternalGRPCBackends {
opts = append(opts, model.WithExternalBackend(k, v))
}

inferenceModel, err := loader.BackendLoader(
opts...,
)
if err != nil {
return nil, err
Expand Down
28 changes: 27 additions & 1 deletion api/backend/llm.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
package backend

import (
"os"
"regexp"
"strings"
"sync"

config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/pkg/gallery"
"github.com/go-skynet/LocalAI/pkg/grpc"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/utils"
)

func ModelInference(s string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string) bool) (func() (string, error), error) {
Expand All @@ -27,12 +30,32 @@ func ModelInference(s string, loader *model.ModelLoader, c config.Config, o *opt
model.WithContext(o.Context),
}

for k, v := range o.ExternalGRPCBackends {
opts = append(opts, model.WithExternalBackend(k, v))
}

if c.Backend != "" {
opts = append(opts, model.WithBackendString(c.Backend))
}

// Check if the modelFile exists, if it doesn't try to load it from the gallery
if o.AutoloadGalleries { // experimental
if _, err := os.Stat(modelFile); os.IsNotExist(err) {
utils.ResetDownloadTimers()
// if we failed to load the model, we try to download it
err := gallery.InstallModelFromGalleryByName(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
if err != nil {
return nil, err
}
}
}

if c.Backend == "" {
inferenceModel, err = loader.GreedyLoader(opts...)
} else {
opts = append(opts, model.WithBackendString(c.Backend))
inferenceModel, err = loader.BackendLoader(opts...)
}

if err != nil {
return nil, err
}
Expand All @@ -50,6 +73,9 @@ func ModelInference(s string, loader *model.ModelLoader, c config.Config, o *opt
return ss, err
} else {
reply, err := inferenceModel.Predict(o.Context, opts)
if err != nil {
return "", err
}
return reply.Message, err
}
}
Expand Down
42 changes: 42 additions & 0 deletions api/backend/transcript.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package backend

import (
"context"
"fmt"

config "github.com/go-skynet/LocalAI/api/config"

"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
model "github.com/go-skynet/LocalAI/pkg/model"
)

func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*api.Result, error) {
opts := []model.Option{
model.WithBackendString(model.WhisperBackend),
model.WithModelFile(c.Model),
model.WithContext(o.Context),
model.WithThreads(uint32(c.Threads)),
model.WithAssetDir(o.AssetsDestination),
}

for k, v := range o.ExternalGRPCBackends {
opts = append(opts, model.WithExternalBackend(k, v))
}

whisperModel, err := o.Loader.BackendLoader(opts...)
if err != nil {
return nil, err
}

if whisperModel == nil {
return nil, fmt.Errorf("could not load whisper model")
}

return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
Dst: audio,
Language: language,
Threads: uint32(c.Threads),
})
}