Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add tts with go-piper #649

Merged
merged 1 commit into from
Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
33 changes: 29 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@ FROM golang:$GO_VERSION as requirements
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=11
ARG CUDA_MINOR_VERSION=7
ARG SPDLOG_VERSION="1.11.0"
ARG PIPER_PHONEMIZE_VERSION='1.0.0'
ARG TARGETARCH
ARG TARGETVARIANT

ENV BUILD_TYPE=${BUILD_TYPE}
ARG GO_TAGS="stablediffusion tts"

RUN apt-get update && \
apt-get install -y ca-certificates cmake curl patch
Expand All @@ -23,26 +28,46 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
; fi
ENV PATH /usr/local/cuda/bin:${PATH}

WORKDIR /build

# OpenBLAS requirements
RUN apt-get install -y libopenblas-dev

# Stable Diffusion requirements
RUN apt-get install -y libopencv-dev && \
ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2

# piper requirements
# Use pre-compiled Piper phonemization library (includes onnxruntime)
#RUN if echo "${GO_TAGS}" | grep -q "tts"; then \
RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSION}.tar.gz" | \
tar -xzvf - && \
mkdir -p "spdlog-${SPDLOG_VERSION}/build" && \
cd "spdlog-${SPDLOG_VERSION}/build" && \
cmake .. && \
make -j8 && \
cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
cd /build && \
mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v${PIPER_PHONEMIZE_VERSION}/libpiper_phonemize-${TARGETARCH}${TARGETVARIANT}.tar.gz" | \
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /lib64/ && \
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
# \
# ; fi

FROM requirements as builder

ARG GO_TAGS=stablediffusion
ARG GO_TAGS="stablediffusion tts"

ENV GO_TAGS=${GO_TAGS}
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all

WORKDIR /build

COPY . .
RUN make build
RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build

FROM requirements

Expand Down
33 changes: 29 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,17 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=f5a8c45396741470583f59b916a2a7641e63bcd0
WHISPER_CPP_VERSION?=57543c169e27312e7546d07ed0d8c6eb806ebc36
BERT_VERSION?=6069103f54b9969c02e789d0fb12a23bd614285f
PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7
BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
export BUILD_TYPE?=
CGO_LDFLAGS?=
CUDA_LIBPATH?=/usr/local/cuda/lib64/
STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
GO_TAGS?=
BUILD_ID?=git
LD_FLAGS=?=
LD_FLAGS?=
OPTIONAL_TARGETS?=
ESPEAK_DATA?=

OS := $(shell uname -s)
ARCH := $(shell uname -m)
Expand All @@ -30,7 +32,7 @@ CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)

C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
LIBRARY_PATH=$(shell pwd)/go-piper:$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz

ifeq ($(BUILD_TYPE),openblas)
CGO_LDFLAGS+=-lopenblas
Expand All @@ -55,10 +57,15 @@ ifeq ($(STATIC),true)
LD_FLAGS=-linkmode external -extldflags -static
endif

ifeq ($(GO_TAGS),stablediffusion)
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
endif

ifeq ($(findstring tts,$(GO_TAGS)),tts)
OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
endif

.PHONY: all test build vendor

all: help
Expand All @@ -82,6 +89,10 @@ gpt4all:
@find ./gpt4all/gpt4all-bindings/golang -type f -name "*.go" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} +
@find ./gpt4all/gpt4all-bindings/golang -type f -name "*.h" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} +

## go-piper
go-piper:
git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
cd go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1

## BERT embeddings
go-bert:
Expand Down Expand Up @@ -133,6 +144,14 @@ backend-assets/gpt4all: gpt4all/gpt4all-bindings/golang/libgpt4all.a
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true

backend-assets/espeak-ng-data:
mkdir -p backend-assets/espeak-ng-data
ifdef ESPEAK_DATA
@cp -rf $(ESPEAK_DATA)/. backend-assets/espeak-ng-data
else
@touch backend-assets/espeak-ng-data/keep
endif

gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a

Expand Down Expand Up @@ -172,6 +191,9 @@ go-llama:
go-llama/libbinding.a: go-llama
$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a

go-piper/libpiper_binding.a:
$(MAKE) -C go-piper libpiper_binding.a example/main

replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
Expand All @@ -181,8 +203,9 @@ replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper

prepare-sources: go-llama go-ggml-transformers gpt4all go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace
prepare-sources: go-llama go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace
$(GOCMD) mod download

## GENERIC
Expand All @@ -195,6 +218,7 @@ rebuild: ## Rebuilds the project
$(MAKE) -C go-stable-diffusion clean
$(MAKE) -C go-bert clean
$(MAKE) -C bloomz clean
$(MAKE) -C go-piper clean
$(MAKE) build

prepare: prepare-sources backend-assets/gpt4all $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-ggml-transformers/libtransformers.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building
Expand All @@ -210,6 +234,7 @@ clean: ## Remove build related file
rm -rf ./go-bert
rm -rf ./bloomz
rm -rf ./whisper.cpp
rm -rf ./go-piper
rm -rf $(BINARY_NAME)
rm -rf release/

Expand Down
5 changes: 5 additions & 0 deletions api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ func App(opts ...AppOption) (*fiber.App, error) {

// audio
app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, options))
app.Post("/tts", ttsEndpoint(cm, options))

// images
app.Post("/v1/images/generations", imageEndpoint(cm, options))
Expand All @@ -136,6 +137,10 @@ func App(opts ...AppOption) (*fiber.App, error) {
app.Static("/generated-images", options.imageDir)
}

if options.audioDir != "" {
app.Static("/generated-audio", options.audioDir)
}

ok := func(c *fiber.Ctx) error {
return c.SendStatus(200)
}
Expand Down
78 changes: 78 additions & 0 deletions api/localai.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package api

import (
"fmt"
"os"
"path/filepath"

model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/tts"
"github.com/go-skynet/LocalAI/pkg/utils"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/gofiber/fiber/v2"
)

type TTSRequest struct {
Model string `json:"model" yaml:"model"`
Input string `json:"input" yaml:"input"`
}

func generateUniqueFileName(dir, baseName, ext string) string {
counter := 1
fileName := baseName + ext

for {
filePath := filepath.Join(dir, fileName)
_, err := os.Stat(filePath)
if os.IsNotExist(err) {
return fileName
}

counter++
fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
}
}

func ttsEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {

input := new(TTSRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}

piperModel, err := o.loader.BackendLoader(model.PiperBackend, input.Model, []llama.ModelOption{}, uint32(0), o.assetsDestination)
if err != nil {
return err
}

if piperModel == nil {
return fmt.Errorf("could not load piper model")
}

w, ok := piperModel.(*tts.Piper)
if !ok {
return fmt.Errorf("loader returned non-piper object %+v", w)
}

if err := os.MkdirAll(o.audioDir, 0755); err != nil {
return err
}

fileName := generateUniqueFileName(o.audioDir, "piper", ".wav")
filePath := filepath.Join(o.audioDir, fileName)

modelPath := filepath.Join(o.loader.ModelPath, input.Model)

if err := utils.VerifyPath(modelPath, o.loader.ModelPath); err != nil {
return err
}

if err := w.TTS(input.Input, modelPath, filePath); err != nil {
return err
}

return c.Download(filePath)
}
}
7 changes: 7 additions & 0 deletions api/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ type Option struct {
f16 bool
debug, disableMessage bool
imageDir string
audioDir string
cors bool
preloadJSONModels string
preloadModelsFromPath string
Expand Down Expand Up @@ -130,6 +131,12 @@ func WithDisableMessage(disableMessage bool) AppOption {
}
}

func WithAudioDir(audioDir string) AppOption {
return func(o *Option) {
o.audioDir = audioDir
}
}

func WithImageDir(imageDir string) AppOption {
return func(o *Option) {
o.imageDir = imageDir
Expand Down
2 changes: 1 addition & 1 deletion entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ cd /build

if [ "$REBUILD" != "false" ]; then
rm -rf ./local-ai
make build
ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build
fi

./local-ai "$@"
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/go-skynet/LocalAI

go 1.19
go 1.20

require (
github.com/donomii/go-rwkv.cpp v0.0.0-20230619005719-f5a8c4539674
Expand Down Expand Up @@ -52,6 +52,7 @@ require (
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/mudler/go-piper v0.0.0-00010101000000-000000000000 // indirect
github.com/otiai10/mint v1.5.1 // indirect
github.com/philhofer/fwd v1.1.2 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
Expand Down
9 changes: 8 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,13 @@ func main() {
Name: "image-path",
Usage: "Image directory",
EnvVars: []string{"IMAGE_PATH"},
Value: "",
Value: "/tmp/generated/images",
},
&cli.StringFlag{
Name: "audio-path",
Usage: "audio directory",
EnvVars: []string{"AUDIO_PATH"},
Value: "/tmp/generated/audio",
},
&cli.StringFlag{
Name: "backend-assets-path",
Expand Down Expand Up @@ -125,6 +131,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
api.WithContextSize(ctx.Int("context-size")),
api.WithDebug(ctx.Bool("debug")),
api.WithImageDir(ctx.String("image-path")),
api.WithAudioDir(ctx.String("audio-path")),
api.WithF16(ctx.Bool("f16")),
api.WithDisableMessage(false),
api.WithCors(ctx.Bool("cors")),
Expand Down
22 changes: 4 additions & 18 deletions pkg/gallery/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"path/filepath"
"strconv"

"github.com/go-skynet/LocalAI/pkg/utils"
"github.com/imdario/mergo"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v2"
Expand Down Expand Up @@ -80,21 +81,6 @@ func ReadConfigFile(filePath string) (*Config, error) {
return &config, nil
}

func inTrustedRoot(path string, trustedRoot string) error {
for path != "/" {
path = filepath.Dir(path)
if path == trustedRoot {
return nil
}
}
return fmt.Errorf("path is outside of trusted root")
}

func verifyPath(path, basePath string) error {
c := filepath.Clean(filepath.Join(basePath, path))
return inTrustedRoot(c, basePath)
}

func Apply(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64)) error {
// Create base path if it doesn't exist
err := os.MkdirAll(basePath, 0755)
Expand All @@ -110,7 +96,7 @@ func Apply(basePath, nameOverride string, config *Config, configOverrides map[st
for _, file := range config.Files {
log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)

if err := verifyPath(file.Filename, basePath); err != nil {
if err := utils.VerifyPath(file.Filename, basePath); err != nil {
return err
}
// Create file path
Expand Down Expand Up @@ -196,7 +182,7 @@ func Apply(basePath, nameOverride string, config *Config, configOverrides map[st

// Write prompt template contents to separate files
for _, template := range config.PromptTemplates {
if err := verifyPath(template.Name+".tmpl", basePath); err != nil {
if err := utils.VerifyPath(template.Name+".tmpl", basePath); err != nil {
return err
}
// Create file path
Expand All @@ -221,7 +207,7 @@ func Apply(basePath, nameOverride string, config *Config, configOverrides map[st
name = nameOverride
}

if err := verifyPath(name+".yaml", basePath); err != nil {
if err := utils.VerifyPath(name+".yaml", basePath); err != nil {
return err
}

Expand Down