Merge upstream/master into branch

Signed-off-by: Ludovic LEROUX <ludovic@inpher.io>
mudler · Apr 11, 2024 · bde6f6e · bde6f6e
2 parents 7be926e + 0a6956b
commit bde6f6e
Show file tree

Hide file tree

Showing 20 changed files with 651 additions and 602 deletions.
diff --git a/.env b/.env
@@ -1,33 +1,33 @@
 ## Set number of threads.
 ## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
-# THREADS=14
+# LOCALAI_THREADS=14
 
 ## Specify a different bind address (defaults to ":8080")
-# ADDRESS=127.0.0.1:8080
+# LOCALAI_ADDRESS=127.0.0.1:8080
 
 ## Default models context size
-# CONTEXT_SIZE=512
+# LOCALAI_CONTEXT_SIZE=512
 #
 ## Define galleries.
 ## models will to install will be visible in `/models/available`
-# GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
+# LOCALAI_GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
 
 ## CORS settings
-# CORS=true
-# CORS_ALLOW_ORIGINS=*
+# LOCALAI_CORS=true
+# LOCALAI_CORS_ALLOW_ORIGINS=*
 
 ## Default path for models
 #
-# MODELS_PATH=/models
+# LOCALAI_MODELS_PATH=/models
 
 ## Enable debug mode
-# DEBUG=true
+# LOCALAI_LOG_LEVEL=debug
 
 ## Disables COMPEL (Diffusers)
 # COMPEL=0
 
 ## Enable/Disable single backend (useful if only one GPU is available)
-# SINGLE_ACTIVE_BACKEND=true
+# LOCALAI_SINGLE_ACTIVE_BACKEND=true
 
 ## Specify a build type. Available: cublas, openblas, clblas.
 ## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
@@ -46,13 +46,13 @@
 # GO_TAGS=stablediffusion
 
 ## Path where to store generated images
-# IMAGE_PATH=/tmp
+# LOCALAI_IMAGE_PATH=/tmp/generated/images
 
 ## Specify a default upload limit in MB (whisper)
-# UPLOAD_LIMIT
+# LOCALAI_UPLOAD_LIMIT=15
 
 ## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
-# EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
+# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
 
 ### Advanced settings ###
 ### Those are not really used by LocalAI, but from components in the stack ###
@@ -72,18 +72,18 @@
 # LLAMACPP_PARALLEL=1
 
 ### Enable to run parallel requests
-# PARALLEL_REQUESTS=true
+# LOCALAI_PARALLEL_REQUESTS=true
 
 ### Watchdog settings
 ###
 # Enables watchdog to kill backends that are inactive for too much time
-# WATCHDOG_IDLE=true
-#
-# Enables watchdog to kill backends that are busy for too much time
-# WATCHDOG_BUSY=true
+# LOCALAI_WATCHDOG_IDLE=true
 #
 # Time in duration format (e.g. 1h30m) after which a backend is considered idle
-# WATCHDOG_IDLE_TIMEOUT=5m
+# LOCALAI_WATCHDOG_IDLE_TIMEOUT=5m
+#
+# Enables watchdog to kill backends that are busy for too much time
+# LOCALAI_WATCHDOG_BUSY=true
 #
 # Time in duration format (e.g. 1h30m) after which a backend is considered busy
-# WATCHDOG_BUSY_TIMEOUT=5m
+# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,25 @@
+# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
+version: 2
+updates:
+  - package-ecosystem: "gomod"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+  - package-ecosystem: "github-actions"
+    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
+    directory: "/"
+    schedule:
+      # Check for updates to GitHub Actions every weekday
+      interval: "weekly"
+  - package-ecosystem: "pip"
+    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
+    directory: "/"
+    schedule:
+      # Check for updates to GitHub Actions every weekday
+      interval: "weekly"
+  - package-ecosystem: "docker"
+    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
+    directory: "/"
+    schedule:
+      # Check for updates to GitHub Actions every weekday
+      interval: "weekly"
diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
@@ -49,7 +49,7 @@ jobs:
         run: |
           bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
       - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v5
+        uses: peter-evans/create-pull-request@v6
         with:
           token: ${{ secrets.UPDATE_BOT_TOKEN }}
           push-to-fork: ci-forks/LocalAI

diff --git a/.github/workflows/bump_docs.yaml b/.github/workflows/bump_docs.yaml
@@ -17,7 +17,7 @@ jobs:
         run: |
           bash .github/bump_docs.sh ${{ matrix.repository }}
       - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v5
+        uses: peter-evans/create-pull-request@v6
         with:
           token: ${{ secrets.UPDATE_BOT_TOKEN }}
           push-to-fork: ci-forks/LocalAI

diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml
@@ -0,0 +1,44 @@
+name: Dependabot auto-merge
+on:
+  pull_request_target:
+    types: [review_requested]
+
+permissions:
+  contents: write
+  pull-requests: write
+  packages: read
+
+jobs:
+  dependabot:
+    runs-on: ubuntu-latest
+    if: ${{ github.actor == 'dependabot[bot]' }}
+    steps:
+      - name: Dependabot metadata
+        id: metadata
+        uses: dependabot/fetch-metadata@v1.3.4
+        with:
+          github-token: "${{ secrets.GITHUB_TOKEN }}"
+          skip-commit-verification: true
+
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Approve a PR if not already approved
+        run: |
+          gh pr checkout "$PR_URL"
+            if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
+          then
+            gh pr review --approve "$PR_URL"
+          else
+            echo "PR already approved.";
+          fi
+        env:
+          PR_URL: ${{github.event.pull_request.html_url}}
+          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
+
+      - name: Enable auto-merge for Dependabot PRs
+        if: ${{ contains(github.event.pull_request.title, 'bump')}}
+        run: gh pr merge --auto --merge "$PR_URL"
+        env:
+          PR_URL: ${{github.event.pull_request.html_url}}
+          GITHUB_TOKEN: ${{secrets.RELEASE_TOKEN}}
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -33,7 +33,7 @@ jobs:
         uses: actions/checkout@v4
         with:
           submodules: true
-      - uses: actions/setup-go@v4
+      - uses: actions/setup-go@v5
         with:
           go-version: '1.21.x'
           cache: false
@@ -55,7 +55,7 @@ jobs:
           sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
       - name: Cache grpc
         id: cache-grpc
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: grpc
           key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
@@ -100,7 +100,7 @@ jobs:
         uses: actions/checkout@v4
         with:
           submodules: true
-      - uses: actions/setup-go@v4
+      - uses: actions/setup-go@v5
         with:
           go-version: '1.21.x'
           cache: false
@@ -138,7 +138,7 @@ jobs:
         uses: actions/checkout@v4
         with:
           submodules: true
-      - uses: actions/setup-go@v4
+      - uses: actions/setup-go@v5
         with:
           go-version: '1.21.x'
           cache: false

diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml
@@ -14,7 +14,7 @@ jobs:
       GO111MODULE: on
     steps:
       - name: Checkout Source
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Run Gosec Security Scanner
         uses: securego/gosec@master
         with:

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -60,7 +60,7 @@ jobs:
         with: 
           submodules: true
       - name: Setup Go ${{ matrix.go-version }}
-        uses: actions/setup-go@v4
+        uses: actions/setup-go@v5
         with:
           go-version: ${{ matrix.go-version }}
           cache: false
@@ -92,7 +92,7 @@ jobs:
           GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
       - name: Cache grpc
         id: cache-grpc
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: grpc
           key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
@@ -177,7 +177,7 @@ jobs:
         with: 
           submodules: true
       - name: Setup Go ${{ matrix.go-version }}
-        uses: actions/setup-go@v4
+        uses: actions/setup-go@v5
         with:
           go-version: ${{ matrix.go-version }}
           cache: false

diff --git a/Makefile b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=1b67731e184e27a465b8c5476061294a4af668ea
+CPPLLAMA_VERSION?=8228b66dbc16290c5cbd70e80ab47c068e2569d8
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh
@@ -2,6 +2,7 @@
 set -ex
 
 SKIP_CONDA=${SKIP_CONDA:-0}
+REQUIREMENTS_FILE=$1
 
 # Check if environment exist
 conda_env_exists(){
@@ -14,7 +15,7 @@ else
     export PATH=$PATH:/opt/conda/bin
     if conda_env_exists "transformers" ; then
         echo "Creating virtual environment..."
-        conda env create --name transformers --file $1
+        conda env create --name transformers --file $REQUIREMENTS_FILE
         echo "Virtual environment created."
     else 
         echo "Virtual environment already exists."
@@ -28,11 +29,16 @@ if [ -d "/opt/intel" ]; then
     pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino]
 fi
 
-if [ "$PIP_CACHE_PURGE" = true ] ; then
-    if [ $SKIP_CONDA -eq 0 ]; then
-        # Activate conda environment
-        source activate transformers
-    fi
+# If we didn't skip conda, activate the environment
+# to install FlashAttention
+if [ $SKIP_CONDA -eq 0 ]; then
+    source activate transformers
+fi
+if [[ $REQUIREMENTS_FILE =~ -nvidia.yml$ ]]; then
+    #TODO: FlashAttention is supported on nvidia and ROCm, but ROCm install can't be done this easily
+    pip install flash-attn --no-build-isolation
+fi
 
+if [ "$PIP_CACHE_PURGE" = true ] ; then
     pip cache purge
 fi
diff --git a/core/cli/cli.go b/core/cli/cli.go
@@ -0,0 +1,20 @@
+package cli
+
+import "embed"
+
+type Context struct {
+	Debug    bool    `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
+	LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug" help:"Set the level of logs to output [${enum}]"`
+
+	// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
+	BackendAssets embed.FS `kong:"-"`
+}
+
+var CLI struct {
+	Context `embed:""`
+
+	Run        RunCMD        `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
+	Models     ModelsCMD     `cmd:"" help:"Manage LocalAI models and definitions"`
+	TTS        TTSCMD        `cmd:"" help:"Convert text to speech"`
+	Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
+}
diff --git a/core/cli/models.go b/core/cli/models.go
@@ -0,0 +1,74 @@
+package cli
+
+import (
+	"encoding/json"
+	"fmt"
+
+	"github.com/go-skynet/LocalAI/pkg/gallery"
+	"github.com/rs/zerolog/log"
+	"github.com/schollz/progressbar/v3"
+)
+
+type ModelsCMDFlags struct {
+	Galleries  string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models"`
+	ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
+}
+
+type ModelsList struct {
+	ModelsCMDFlags `embed:""`
+}
+
+type ModelsInstall struct {
+	ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
+
+	ModelsCMDFlags `embed:""`
+}
+
+type ModelsCMD struct {
+	List    ModelsList    `cmd:"" help:"List the models avaiable in your galleries" default:"withargs"`
+	Install ModelsInstall `cmd:"" help:"Install a model from the gallery"`
+}
+
+func (ml *ModelsList) Run(ctx *Context) error {
+	var galleries []gallery.Gallery
+	if err := json.Unmarshal([]byte(ml.Galleries), &galleries); err != nil {
+		log.Error().Err(err).Msg("unable to load galleries")
+	}
+
+	models, err := gallery.AvailableGalleryModels(galleries, ml.ModelsPath)
+	if err != nil {
+		return err
+	}
+	for _, model := range models {
+		if model.Installed {
+			fmt.Printf(" * %s@%s (installed)\n", model.Gallery.Name, model.Name)
+		} else {
+			fmt.Printf(" - %s@%s\n", model.Gallery.Name, model.Name)
+		}
+	}
+	return nil
+}
+
+func (mi *ModelsInstall) Run(ctx *Context) error {
+	modelName := mi.ModelArgs[0]
+
+	var galleries []gallery.Gallery
+	if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil {
+		log.Error().Err(err).Msg("unable to load galleries")
+	}
+
+	progressBar := progressbar.NewOptions(
+		1000,
+		progressbar.OptionSetDescription(fmt.Sprintf("downloading model %s", modelName)),
+		progressbar.OptionShowBytes(false),
+		progressbar.OptionClearOnFinish(),
+	)
+	progressCallback := func(fileName string, current string, total string, percentage float64) {
+		progressBar.Set(int(percentage * 10))
+	}
+	err := gallery.InstallModelFromGallery(galleries, modelName, mi.ModelsPath, gallery.GalleryModel{}, progressCallback)
+	if err != nil {
+		return err
+	}
+	return nil
+}