mudler · mudler · Jul 21, 2023 · Jul 19, 2023 · Jul 19, 2023 · Jul 20, 2023
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -29,6 +29,7 @@ jobs:
 
           sudo apt-get install -y ca-certificates cmake curl patch
           sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+          sudo pip install -r extra/requirements.txt
 
           sudo mkdir /build && sudo chmod -R 777 /build && cd /build && \
           curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v1.11.0.tar.gz" | \
@@ -45,7 +46,6 @@ jobs:
           sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /lib64/ && \
           sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
           sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
-
       - name: Test
         run: |
           ESPEAK_DATA="/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data" GO_TAGS="tts stablediffusion" make test

diff --git a/.gitignore b/.gitignore
@@ -3,9 +3,10 @@ go-llama
 /gpt4all
 go-stable-diffusion
 go-piper
+/go-bert
 go-ggllm
 /piper
-
+__pycache__/
 *.a
 get-sources
 

diff --git a/Dockerfile b/Dockerfile
@@ -11,10 +11,15 @@ ARG TARGETARCH
 ARG TARGETVARIANT
 
 ENV BUILD_TYPE=${BUILD_TYPE}
+ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/huggingface.py"
 ARG GO_TAGS="stablediffusion tts"
 
 RUN apt-get update && \
-    apt-get install -y ca-certificates cmake curl patch
+    apt-get install -y ca-certificates cmake curl patch pip
+
+# Extras requirements
+COPY extra/requirements.txt /build/extra/requirements.txt
+RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
 
 # CuBLAS requirements
 RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \

diff --git a/Makefile b/Makefile
@@ -313,7 +313,7 @@ test: prepare test-models/testmodel grpcs
 	@echo 'Running tests'
 	export GO_TAGS="tts stablediffusion"
 	$(MAKE) prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
+	HUGGINGFACE_GRPC=$(abspath ./)/extra/grpc/huggingface/huggingface.py TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama" --flake-attempts 5 -v -r ./api ./pkg
 	$(MAKE) test-gpt4all
 	$(MAKE) test-llama
@@ -338,9 +338,7 @@ test-stablediffusion: prepare-test
 
 test-container:
 	docker build --target requirements -t local-ai-test-container .
-	docker run --name localai-tests -e GO_TAGS=$(GO_TAGS) -ti -v $(abspath ./):/build local-ai-test-container make test
-	docker rm localai-tests
-	docker rmi local-ai-test-container
+	docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container
 
 ## Help:
 help: ## Show this help.
@@ -354,10 +352,15 @@ help: ## Show this help.
 		else if (/^## .*$$/) {printf "  ${CYAN}%s${RESET}\n", substr($$1,4)} \
 		}' $(MAKEFILE_LIST)
 
-protogen:
+protogen: protogen-go protogen-python
+
+protogen-go:
 	protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative \
     pkg/grpc/proto/backend.proto
 
+protogen-python:
+	python -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/huggingface/ --grpc_python_out=extra/grpc/huggingface/ pkg/grpc/proto/backend.proto
+
 ## GRPC
 
 backend-assets/grpc:

diff --git a/api/api_test.go b/api/api_test.go
@@ -125,6 +125,11 @@ var _ = Describe("API test", func() {
 	var cancel context.CancelFunc
 	var tmpdir string
 
+	commonOpts := []options.AppOption{
+		options.WithDebug(true),
+		options.WithDisableMessage(true),
+	}
+
 	Context("API with ephemeral models", func() {
 		BeforeEach(func() {
 			var err error
@@ -143,7 +148,7 @@ var _ = Describe("API test", func() {
 					Name:            "bert2",
 					URL:             "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
 					Overrides:       map[string]interface{}{"foo": "bar"},
-					AdditionalFiles: []gallery.File{gallery.File{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
+					AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
 				},
 			}
 			out, err := yaml.Marshal(g)
@@ -159,9 +164,10 @@ var _ = Describe("API test", func() {
 			}
 
 			app, err = App(
-				options.WithContext(c),
-				options.WithGalleries(galleries),
-				options.WithModelLoader(modelLoader), options.WithBackendAssets(backendAssets), options.WithBackendAssetsOutput(tmpdir))
+				append(commonOpts,
+					options.WithContext(c),
+					options.WithGalleries(galleries),
+					options.WithModelLoader(modelLoader), options.WithBackendAssets(backendAssets), options.WithBackendAssetsOutput(tmpdir))...)
 			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")
 
@@ -400,13 +406,14 @@ var _ = Describe("API test", func() {
 			}
 
 			app, err = App(
-				options.WithContext(c),
-				options.WithAudioDir(tmpdir),
-				options.WithImageDir(tmpdir),
-				options.WithGalleries(galleries),
-				options.WithModelLoader(modelLoader),
-				options.WithBackendAssets(backendAssets),
-				options.WithBackendAssetsOutput(tmpdir),
+				append(commonOpts,
+					options.WithContext(c),
+					options.WithAudioDir(tmpdir),
+					options.WithImageDir(tmpdir),
+					options.WithGalleries(galleries),
+					options.WithModelLoader(modelLoader),
+					options.WithBackendAssets(backendAssets),
+					options.WithBackendAssetsOutput(tmpdir))...,
 			)
 			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")
@@ -500,7 +507,12 @@ var _ = Describe("API test", func() {
 			c, cancel = context.WithCancel(context.Background())
 
 			var err error
-			app, err = App(options.WithContext(c), options.WithModelLoader(modelLoader))
+			app, err = App(
+				append(commonOpts,
+					options.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
+					options.WithContext(c),
+					options.WithModelLoader(modelLoader),
+				)...)
 			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")
 
@@ -524,7 +536,7 @@ var _ = Describe("API test", func() {
 		It("returns the models list", func() {
 			models, err := client.ListModels(context.TODO())
 			Expect(err).ToNot(HaveOccurred())
-			Expect(len(models.Models)).To(Equal(10))
+			Expect(len(models.Models)).To(Equal(11))
 		})
 		It("can generate completions", func() {
 			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
@@ -555,7 +567,7 @@ var _ = Describe("API test", func() {
 		})
 
 		It("returns errors", func() {
-			backends := len(model.AutoLoadBackends)
+			backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface
 			_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
 			Expect(err).To(HaveOccurred())
 			Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends)))
@@ -602,6 +614,36 @@ var _ = Describe("API test", func() {
 			Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
 		})
 
+		Context("External gRPC calls", func() {
+			It("calculate embeddings with huggingface", func() {
+				if runtime.GOOS != "linux" {
+					Skip("test supported only on linux")
+				}
+				resp, err := client.CreateEmbeddings(
+					context.Background(),
+					openai.EmbeddingRequest{
+						Model: openai.AdaCodeSearchCode,
+						Input: []string{"sun", "cat"},
+					},
+				)
+				Expect(err).ToNot(HaveOccurred())
+				Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
+				Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
+
+				sunEmbedding := resp.Data[0].Embedding
+				resp2, err := client.CreateEmbeddings(
+					context.Background(),
+					openai.EmbeddingRequest{
+						Model: openai.AdaCodeSearchCode,
+						Input: []string{"sun"},
+					},
+				)
+				Expect(err).ToNot(HaveOccurred())
+				Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
+				Expect(resp2.Data[0].Embedding).ToNot(Equal(resp.Data[1].Embedding))
+			})
+		})
+
 		Context("backends", func() {
 			It("runs rwkv completion", func() {
 				if runtime.GOOS != "linux" {
@@ -674,7 +716,12 @@ var _ = Describe("API test", func() {
 			c, cancel = context.WithCancel(context.Background())
 
 			var err error
-			app, err = App(options.WithContext(c), options.WithModelLoader(modelLoader), options.WithConfigFile(os.Getenv("CONFIG_FILE")))
+			app, err = App(
+				append(commonOpts,
+					options.WithContext(c),
+					options.WithModelLoader(modelLoader),
+					options.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
+			)
 			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")
 
@@ -696,7 +743,7 @@ var _ = Describe("API test", func() {
 		It("can generate chat completions from config file", func() {
 			models, err := client.ListModels(context.TODO())
 			Expect(err).ToNot(HaveOccurred())
-			Expect(len(models.Models)).To(Equal(12))
+			Expect(len(models.Models)).To(Equal(13))
 		})
 		It("can generate chat completions from config file", func() {
 			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})

diff --git a/api/backend/embeddings.go b/api/backend/embeddings.go
@@ -30,6 +30,10 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
 		model.WithContext(o.Context),
 	}
 
+	for k, v := range o.ExternalGRPCBackends {
+		opts = append(opts, model.WithExternalBackend(k, v))
+	}
+
 	if c.Backend == "" {
 		inferenceModel, err = loader.GreedyLoader(opts...)
 	} else {

diff --git a/api/backend/image.go b/api/backend/image.go
@@ -15,12 +15,20 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
 		return nil, fmt.Errorf("endpoint only working with stablediffusion models")
 	}
 
-	inferenceModel, err := loader.BackendLoader(
+	opts := []model.Option{
 		model.WithBackendString(c.Backend),
 		model.WithAssetDir(o.AssetsDestination),
 		model.WithThreads(uint32(c.Threads)),
 		model.WithContext(o.Context),
 		model.WithModelFile(c.ImageGenerationAssets),
+	}
+
+	for k, v := range o.ExternalGRPCBackends {
+		opts = append(opts, model.WithExternalBackend(k, v))
+	}
+
+	inferenceModel, err := loader.BackendLoader(
+		opts...,
 	)
 	if err != nil {
 		return nil, err

diff --git a/api/backend/llm.go b/api/backend/llm.go
@@ -1,14 +1,17 @@
 package backend
 
 import (
+	"os"
 	"regexp"
 	"strings"
 	"sync"
 
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
+	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/go-skynet/LocalAI/pkg/grpc"
 	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/utils"
 )
 
 func ModelInference(s string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string) bool) (func() (string, error), error) {
@@ -27,12 +30,32 @@ func ModelInference(s string, loader *model.ModelLoader, c config.Config, o *opt
 		model.WithContext(o.Context),
 	}
 
+	for k, v := range o.ExternalGRPCBackends {
+		opts = append(opts, model.WithExternalBackend(k, v))
+	}
+
+	if c.Backend != "" {
+		opts = append(opts, model.WithBackendString(c.Backend))
+	}
+
+	// Check if the modelFile exists, if it doesn't try to load it from the gallery
+	if o.AutoloadGalleries { // experimental
+		if _, err := os.Stat(modelFile); os.IsNotExist(err) {
+			utils.ResetDownloadTimers()
+			// if we failed to load the model, we try to download it
+			err := gallery.InstallModelFromGalleryByName(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
+			if err != nil {
+				return nil, err
+			}
+		}
+	}
+
 	if c.Backend == "" {
 		inferenceModel, err = loader.GreedyLoader(opts...)
 	} else {
-		opts = append(opts, model.WithBackendString(c.Backend))
 		inferenceModel, err = loader.BackendLoader(opts...)
 	}
+
 	if err != nil {
 		return nil, err
 	}
@@ -50,6 +73,9 @@ func ModelInference(s string, loader *model.ModelLoader, c config.Config, o *opt
 			return ss, err
 		} else {
 			reply, err := inferenceModel.Predict(o.Context, opts)
+			if err != nil {
+				return "", err
+			}
 			return reply.Message, err
 		}
 	}

diff --git a/api/backend/transcript.go b/api/backend/transcript.go
@@ -0,0 +1,42 @@
+package backend
+
+import (
+	"context"
+	"fmt"
+
+	config "github.com/go-skynet/LocalAI/api/config"
+
+	"github.com/go-skynet/LocalAI/api/options"
+	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
+	model "github.com/go-skynet/LocalAI/pkg/model"
+)
+
+func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*api.Result, error) {
+	opts := []model.Option{
+		model.WithBackendString(model.WhisperBackend),
+		model.WithModelFile(c.Model),
+		model.WithContext(o.Context),
+		model.WithThreads(uint32(c.Threads)),
+		model.WithAssetDir(o.AssetsDestination),
+	}
+
+	for k, v := range o.ExternalGRPCBackends {
+		opts = append(opts, model.WithExternalBackend(k, v))
+	}
+
+	whisperModel, err := o.Loader.BackendLoader(opts...)
+	if err != nil {
+		return nil, err
+	}
+
+	if whisperModel == nil {
+		return nil, fmt.Errorf("could not load whisper model")
+	}
+
+	return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
+		Dst:      audio,
+		Language: language,
+		Threads:  uint32(c.Threads),
+	})
+}