Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
mudler committed Nov 8, 2023
1 parent f0e265a commit 7402164
Show file tree
Hide file tree
Showing 21 changed files with 483 additions and 336 deletions.
3 changes: 2 additions & 1 deletion api/backend/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ type TokenUsage struct {
Completion int
}

func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
modelFile := c.Model

grpcOpts := gRPCModelOpts(c)
Expand Down Expand Up @@ -72,6 +72,7 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
fn := func() (LLMResponse, error) {
opts := gRPCPredictOpts(c, loader.ModelPath)
opts.Prompt = s
opts.Images = images

tokenUsage := TokenUsage{}

Expand Down
1 change: 1 addition & 0 deletions api/backend/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
DraftModel: c.DraftModel,
AudioPath: c.VallE.AudioPath,
Quantization: c.Quantization,
MMProj: c.MMProj,
LoraAdapter: c.LoraAdapter,
LoraBase: c.LoraBase,
NGQA: c.NGQA,
Expand Down
1 change: 1 addition & 0 deletions api/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ type LLMConfig struct {
DraftModel string `yaml:"draft_model"`
NDraft int32 `yaml:"n_draft"`
Quantization string `yaml:"quantization"`
MMProj string `yaml:"mmproj"`
}

type AutoGPTQ struct {
Expand Down
18 changes: 13 additions & 5 deletions api/openai/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
}

if input.ResponseFormat == "json_object" {
input.Grammar = grammar.JSONBNF
}

// process functions if we have any defined or if we have a function call string
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
log.Debug().Msgf("Response needs to process functions")
Expand Down Expand Up @@ -140,14 +144,14 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
}
}
r := config.Roles[role]
contentExists := i.Content != nil && *i.Content != ""
contentExists := i.Content != nil && i.StringContent != ""
// First attempt to populate content via a chat message specific template
if config.TemplateConfig.ChatMessage != "" {
chatMessageData := model.ChatMessageTemplateData{
SystemPrompt: config.SystemPrompt,
Role: r,
RoleName: role,
Content: *i.Content,
Content: i.StringContent,
MessageIndex: messageIndex,
}
templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
Expand All @@ -166,7 +170,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
if content == "" {
if r != "" {
if contentExists {
content = fmt.Sprint(r, " ", *i.Content)
content = fmt.Sprint(r, " ", i.StringContent)
}
if i.FunctionCall != nil {
j, err := json.Marshal(i.FunctionCall)
Expand All @@ -180,7 +184,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
}
} else {
if contentExists {
content = fmt.Sprint(*i.Content)
content = fmt.Sprint(i.StringContent)
}
if i.FunctionCall != nil {
j, err := json.Marshal(i.FunctionCall)
Expand Down Expand Up @@ -334,7 +338,11 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
// Otherwise ask the LLM to understand the JSON output and the context, and return a message
// Note: This costs (in term of CPU) another computation
config.Grammar = ""
predFunc, err := backend.ModelInference(input.Context, predInput, o.Loader, *config, o, nil)
images := []string{}
for _, m := range input.Messages {
images = append(images, m.StringImages...)
}
predFunc, err := backend.ModelInference(input.Context, predInput, images, o.Loader, *config, o, nil)
if err != nil {
log.Error().Msgf("inference error: %s", err.Error())
return
Expand Down
5 changes: 5 additions & 0 deletions api/openai/completion.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/api/schema"
"github.com/go-skynet/LocalAI/pkg/grammar"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/google/uuid"
Expand Down Expand Up @@ -64,6 +65,10 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
return fmt.Errorf("failed reading parameters from request:%w", err)
}

if input.ResponseFormat == "json_object" {
input.Grammar = grammar.JSONBNF
}

log.Debug().Msgf("Parameter Config: %+v", config)

if input.Stream {
Expand Down
7 changes: 6 additions & 1 deletion api/openai/inference.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,13 @@ func ComputeChoices(
n = 1
}

images := []string{}
for _, m := range req.Messages {
images = append(images, m.StringImages...)
}

// get the model function to call for the result
predFunc, err := backend.ModelInference(req.Context, predInput, loader, *config, o, tokenCallback)
predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback)
if err != nil {
return result, backend.TokenUsage{}, err
}
Expand Down
63 changes: 63 additions & 0 deletions api/openai/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ package openai

import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"strings"
Expand Down Expand Up @@ -61,6 +64,37 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
return modelFile, input, nil
}

// this function check if the string is an URL, if it's an URL downloads the image in memory
// encodes it in base64 and returns the base64 string
func getBase64Image(s string) (string, error) {
if strings.HasPrefix(s, "http") {
// download the image
resp, err := http.Get(s)
if err != nil {
return "", err
}
defer resp.Body.Close()

// read the image data into memory
data, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", err
}

// encode the image data in base64
encoded := base64.StdEncoding.EncodeToString(data)

// return the base64 string
return encoded, nil
}

// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
if strings.HasPrefix(s, "data:image/jpeg;base64,") {
return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
}
return "", fmt.Errorf("not valid string")
}

func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
if input.Echo {
config.Echo = input.Echo
Expand Down Expand Up @@ -129,6 +163,35 @@ func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
}
}

// Decode each request's message content
index := 0
for _, m := range input.Messages {
switch content := m.Content.(type) {
case string:
m.StringContent = content
case []interface{}:
dat, _ := json.Marshal(content)
c := []schema.Content{}
json.Unmarshal(dat, &c)
for _, pp := range c {
if pp.Type == "text" {
m.StringContent = pp.Text
} else if pp.Type == "image_url" {
// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
base64, err := getBase64Image(pp.ImageURL)
if err == nil {
m.StringImages = append(m.StringImages, base64) // TODO: make sure that we only return base64 stuff
// set a placeholder for each image
m.StringContent = m.StringContent + fmt.Sprintf("[img-%d]", index)
index++
} else {
fmt.Print("Failed encoding image", err)
}
}
}
}
}

if input.RepeatPenalty != 0 {
config.RepeatPenalty = input.RepeatPenalty
}
Expand Down
12 changes: 11 additions & 1 deletion api/schema/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,21 @@ type Choice struct {
Text string `json:"text,omitempty"`
}

type Content struct {
Type string `json:"type" yaml:"type"`
Text string `json:"text" yaml:"text"`
ImageURL string `json:"image_url" yaml:"image_url"`
}

type Message struct {
// The message role
Role string `json:"role,omitempty" yaml:"role"`
// The message content
Content *string `json:"content" yaml:"content"`
Content interface{} `json:"content" yaml:"content"`

StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`

// A result of a function call
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
}
Expand Down
35 changes: 19 additions & 16 deletions backend/cpp/llama/grpc-server.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// llama.cpp gRPC C++ backend server
//
// Ettore Di Giacinto <mudler@localai.io>
// Ettore Di Giacinto <mudler@localai.io> and llama.cpp authors
//
// This is a gRPC server for llama.cpp compatible with the LocalAI proto
// Note: this is a re-adaptation of the original llama.cpp example/server.cpp for HTTP,
// Note: this is a re-adaptation of the original llama.cpp example/server.cpp for HTTP (https://github.com/ggerganov/llama.cpp/tree/master/examples/server),
// but modified to work with gRPC
//

Expand Down Expand Up @@ -39,7 +39,7 @@ using grpc::Status;
using backend::HealthMessage;


///// LLAMA.CPP server
///// LLAMA.CPP server code below

using json = nlohmann::json;

Expand Down Expand Up @@ -1809,7 +1809,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con

/////////////////////////////////
////////////////////////////////
//////// LOCALAI
//////// LOCALAI code starts below here
/////////////////////////////////
////////////////////////////////

bool loaded_model; // TODO: add a mutex for this, but happens only once loading the model

Expand Down Expand Up @@ -1880,6 +1882,16 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
data["prompt"] = predict->prompt();
data["ignore_eos"] = predict->ignoreeos();

// for each image in the request, add the image data
//
for (int i = 0; i < predict->images_size(); i++) {
data["image_data"].push_back(json
{
{"id", i},
{"data", predict->images(i)},
});
}

data["stop"] = predict->stopprompts();
// data["n_probs"] = predict->nprobs();
//TODO: images,
Expand Down Expand Up @@ -1953,14 +1965,15 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// }
// }



static void params_parse(const backend::ModelOptions* request,
gpt_params & params) {

// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809

params.model = request->modelfile();
// get the directory of modelfile
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
params.mmproj = model_dir + request->mmproj();
// params.model_alias ??
params.model_alias = request->modelfile();
params.n_ctx = request->contextsize();
Expand Down Expand Up @@ -2071,16 +2084,6 @@ class BackendServiceImpl final : public backend::Backend::Service {
break;
}
}
return grpc::Status::OK;


// auto on_complete = [task_id, &llama] (bool)
// {
// // cancel
// llama.request_cancel(task_id);
// };



return grpc::Status::OK;
}
Expand Down

0 comments on commit 7402164

Please sign in to comment.