Skip to content

Commit

Permalink
feat: add tts with go-piper
Browse files Browse the repository at this point in the history
Signed-off-by: mudler <mudler@localai.io>
  • Loading branch information
mudler committed Jun 22, 2023
1 parent cc31c58 commit 1714a2a
Show file tree
Hide file tree
Showing 14 changed files with 235 additions and 27 deletions.
33 changes: 29 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@ FROM golang:$GO_VERSION as requirements
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=11
ARG CUDA_MINOR_VERSION=7
ARG SPDLOG_VERSION="1.11.0"
ARG PIPER_PHONEMIZE_VERSION='1.0.0'
ARG TARGETARCH
ARG TARGETVARIANT

ENV BUILD_TYPE=${BUILD_TYPE}
ARG GO_TAGS="stablediffusion tts"

RUN apt-get update && \
apt-get install -y ca-certificates cmake curl patch
Expand All @@ -23,26 +28,46 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
; fi
ENV PATH /usr/local/cuda/bin:${PATH}

WORKDIR /build

# OpenBLAS requirements
RUN apt-get install -y libopenblas-dev

# Stable Diffusion requirements
RUN apt-get install -y libopencv-dev && \
ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2

# piper requirements
# Use pre-compiled Piper phonemization library (includes onnxruntime)
#RUN if echo "${GO_TAGS}" | grep -q "tts"; then \
RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSION}.tar.gz" | \
tar -xzvf - && \
mkdir -p "spdlog-${SPDLOG_VERSION}/build" && \
cd "spdlog-${SPDLOG_VERSION}/build" && \
cmake .. && \
make -j8 && \
cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
cd /build && \
mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v${PIPER_PHONEMIZE_VERSION}/libpiper_phonemize-${TARGETARCH}${TARGETVARIANT}.tar.gz" | \
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /lib64/ && \
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
# \
# ; fi

FROM requirements as builder

ARG GO_TAGS=stablediffusion
ARG GO_TAGS="stablediffusion tts"

ENV GO_TAGS=${GO_TAGS}
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all

WORKDIR /build

COPY . .
RUN make build
RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build

FROM requirements

Expand Down
33 changes: 29 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,17 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=f5a8c45396741470583f59b916a2a7641e63bcd0
WHISPER_CPP_VERSION?=57543c169e27312e7546d07ed0d8c6eb806ebc36
BERT_VERSION?=6069103f54b9969c02e789d0fb12a23bd614285f
PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7
BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
export BUILD_TYPE?=
CGO_LDFLAGS?=
CUDA_LIBPATH?=/usr/local/cuda/lib64/
STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
GO_TAGS?=
BUILD_ID?=git
LD_FLAGS=?=
LD_FLAGS?=
OPTIONAL_TARGETS?=
ESPEAK_DATA?=

OS := $(shell uname -s)
ARCH := $(shell uname -m)
Expand All @@ -30,7 +32,7 @@ CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)

C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
LIBRARY_PATH=$(shell pwd)/go-piper:$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz

ifeq ($(BUILD_TYPE),openblas)
CGO_LDFLAGS+=-lopenblas
Expand All @@ -55,10 +57,15 @@ ifeq ($(STATIC),true)
LD_FLAGS=-linkmode external -extldflags -static
endif

ifeq ($(GO_TAGS),stablediffusion)
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
endif

ifeq ($(findstring tts,$(GO_TAGS)),tts)
OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
endif

.PHONY: all test build vendor

all: help
Expand All @@ -82,6 +89,10 @@ gpt4all:
@find ./gpt4all/gpt4all-bindings/golang -type f -name "*.go" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} +
@find ./gpt4all/gpt4all-bindings/golang -type f -name "*.h" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} +

## go-piper
go-piper:
git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
cd go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1

## BERT embeddings
go-bert:
Expand Down Expand Up @@ -133,6 +144,14 @@ backend-assets/gpt4all: gpt4all/gpt4all-bindings/golang/libgpt4all.a
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true

backend-assets/espeak-ng-data:
mkdir -p backend-assets/espeak-ng-data
ifdef ESPEAK_DATA
@cp -rf $(ESPEAK_DATA)/. backend-assets/espeak-ng-data
else
@touch backend-assets/espeak-ng-data/keep
endif

gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a

Expand Down Expand Up @@ -172,6 +191,9 @@ go-llama:
go-llama/libbinding.a: go-llama
$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a

go-piper/libpiper_binding.a:
$(MAKE) -C go-piper libpiper_binding.a example/main

replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
Expand All @@ -181,8 +203,9 @@ replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper

prepare-sources: go-llama go-ggml-transformers gpt4all go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace
prepare-sources: go-llama go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace
$(GOCMD) mod download

## GENERIC
Expand All @@ -195,6 +218,7 @@ rebuild: ## Rebuilds the project
$(MAKE) -C go-stable-diffusion clean
$(MAKE) -C go-bert clean
$(MAKE) -C bloomz clean
$(MAKE) -C go-piper clean
$(MAKE) build

prepare: prepare-sources backend-assets/gpt4all $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-ggml-transformers/libtransformers.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building
Expand All @@ -210,6 +234,7 @@ clean: ## Remove build related file
rm -rf ./go-bert
rm -rf ./bloomz
rm -rf ./whisper.cpp
rm -rf ./go-piper
rm -rf $(BINARY_NAME)
rm -rf release/

Expand Down
5 changes: 5 additions & 0 deletions api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ func App(opts ...AppOption) (*fiber.App, error) {

// audio
app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, options))
app.Post("/tts", ttsEndpoint(cm, options))

// images
app.Post("/v1/images/generations", imageEndpoint(cm, options))
Expand All @@ -136,6 +137,10 @@ func App(opts ...AppOption) (*fiber.App, error) {
app.Static("/generated-images", options.imageDir)
}

if options.audioDir != "" {
app.Static("/generated-audio", options.audioDir)
}

ok := func(c *fiber.Ctx) error {
return c.SendStatus(200)
}
Expand Down
78 changes: 78 additions & 0 deletions api/localai.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package api

import (
"fmt"
"os"
"path/filepath"

model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/tts"
"github.com/go-skynet/LocalAI/pkg/utils"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/gofiber/fiber/v2"
)

type TTSRequest struct {
Model string `json:"model" yaml:"model"`
Input string `json:"input" yaml:"input"`
}

func generateUniqueFileName(dir, baseName, ext string) string {
counter := 1
fileName := baseName + ext

for {
filePath := filepath.Join(dir, fileName)
_, err := os.Stat(filePath)
if os.IsNotExist(err) {
return fileName
}

counter++
fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
}
}

func ttsEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {

input := new(TTSRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}

piperModel, err := o.loader.BackendLoader(model.PiperBackend, input.Model, []llama.ModelOption{}, uint32(0), o.assetsDestination)
if err != nil {
return err
}

if piperModel == nil {
return fmt.Errorf("could not load piper model")
}

w, ok := piperModel.(*tts.Piper)
if !ok {
return fmt.Errorf("loader returned non-piper object %+v", w)
}

if err := os.MkdirAll(o.audioDir, 0755); err != nil {
return err
}

fileName := generateUniqueFileName(o.audioDir, "piper", ".wav")
filePath := filepath.Join(o.audioDir, fileName)

modelPath := filepath.Join(o.loader.ModelPath, input.Model)

if err := utils.VerifyPath(modelPath, o.loader.ModelPath); err != nil {
return err
}

if err := w.TTS(input.Input, modelPath, filePath); err != nil {
return err
}

return c.Download(filePath)
}
}
7 changes: 7 additions & 0 deletions api/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ type Option struct {
f16 bool
debug, disableMessage bool
imageDir string
audioDir string
cors bool
preloadJSONModels string
preloadModelsFromPath string
Expand Down Expand Up @@ -130,6 +131,12 @@ func WithDisableMessage(disableMessage bool) AppOption {
}
}

func WithAudioDir(audioDir string) AppOption {
return func(o *Option) {
o.audioDir = audioDir
}
}

func WithImageDir(imageDir string) AppOption {
return func(o *Option) {
o.imageDir = imageDir
Expand Down
2 changes: 1 addition & 1 deletion entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ cd /build

if [ "$REBUILD" != "false" ]; then
rm -rf ./local-ai
make build
ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build
fi

./local-ai "$@"
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/go-skynet/LocalAI

go 1.19
go 1.20

require (
github.com/donomii/go-rwkv.cpp v0.0.0-20230619005719-f5a8c4539674
Expand Down Expand Up @@ -52,6 +52,7 @@ require (
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/mudler/go-piper v0.0.0-00010101000000-000000000000 // indirect
github.com/otiai10/mint v1.5.1 // indirect
github.com/philhofer/fwd v1.1.2 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
Expand Down
9 changes: 8 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,13 @@ func main() {
Name: "image-path",
Usage: "Image directory",
EnvVars: []string{"IMAGE_PATH"},
Value: "",
Value: "/tmp/generated/images",
},
&cli.StringFlag{
Name: "audio-path",
Usage: "audio directory",
EnvVars: []string{"AUDIO_PATH"},
Value: "/tmp/generated/audio",
},
&cli.StringFlag{
Name: "backend-assets-path",
Expand Down Expand Up @@ -125,6 +131,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
api.WithContextSize(ctx.Int("context-size")),
api.WithDebug(ctx.Bool("debug")),
api.WithImageDir(ctx.String("image-path")),
api.WithAudioDir(ctx.String("audio-path")),
api.WithF16(ctx.Bool("f16")),
api.WithDisableMessage(false),
api.WithCors(ctx.Bool("cors")),
Expand Down
18 changes: 2 additions & 16 deletions pkg/gallery/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"path/filepath"
"strconv"

"github.com/go-skynet/LocalAI/pkg/utils"
"github.com/imdario/mergo"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v2"
Expand Down Expand Up @@ -80,21 +81,6 @@ func ReadConfigFile(filePath string) (*Config, error) {
return &config, nil
}

func inTrustedRoot(path string, trustedRoot string) error {
for path != "/" {
path = filepath.Dir(path)
if path == trustedRoot {
return nil
}
}
return fmt.Errorf("path is outside of trusted root")
}

func verifyPath(path, basePath string) error {
c := filepath.Clean(filepath.Join(basePath, path))
return inTrustedRoot(c, basePath)
}

func Apply(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64)) error {
// Create base path if it doesn't exist
err := os.MkdirAll(basePath, 0755)
Expand All @@ -110,7 +96,7 @@ func Apply(basePath, nameOverride string, config *Config, configOverrides map[st
for _, file := range config.Files {
log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)

if err := verifyPath(file.Filename, basePath); err != nil {
if err := utils.VerifyPath(file.Filename, basePath); err != nil {
return err
}
// Create file path
Expand Down

0 comments on commit 1714a2a

Please sign in to comment.