Skip to content

Commit

Permalink
Merge pull request #589 from quarkiverse/ollama-token-opt
Browse files Browse the repository at this point in the history
Make token limit configuration for Ollama optional
  • Loading branch information
geoand committed May 16, 2024
2 parents 62c20d5 + 66e3285 commit 369ac9b
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_OLLAMA_CHAT_MODEL_NUM_PREDICT+++`
endif::add-copy-button-to-env-var[]
--|int
|`128`
|


a| [[quarkus-langchain4j-ollama_quarkus-langchain4j-ollama-chat-model-stop]]`link:#quarkus-langchain4j-ollama_quarkus-langchain4j-ollama-chat-model-stop[quarkus.langchain4j.ollama.chat-model.stop]`
Expand Down Expand Up @@ -598,7 +598,7 @@ ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_OLLAMA__MODEL_NAME__CHAT_MODEL_NUM_PREDICT+++`
endif::add-copy-button-to-env-var[]
--|int
|`128`
|


a| [[quarkus-langchain4j-ollama_quarkus-langchain4j-ollama-model-name-chat-model-stop]]`link:#quarkus-langchain4j-ollama_quarkus-langchain4j-ollama-model-name-chat-model-stop[quarkus.langchain4j.ollama."model-name".chat-model.stop]`
Expand Down
17 changes: 17 additions & 0 deletions docs/modules/ROOT/pages/includes/quarkus-langchain4j.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,23 @@ endif::add-copy-button-to-env-var[]
|`11434`


a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j_quarkus-langchain4j-devservices-preload]]`link:#quarkus-langchain4j_quarkus-langchain4j-devservices-preload[quarkus.langchain4j.devservices.preload]`


[.description]
--
Instructs Ollama to preload a model in order to get faster response times

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_DEVSERVICES_PRELOAD+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_DEVSERVICES_PRELOAD+++`
endif::add-copy-button-to-env-var[]
--|boolean
|`true`


a| [[quarkus-langchain4j_quarkus-langchain4j-log-requests]]`link:#quarkus-langchain4j_quarkus-langchain4j-log-requests[quarkus.langchain4j.log-requests]`


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
public record ChatResponse(String model, String createdAt, Message message, Boolean done, Integer promptEvalCount,
Integer evalCount) {

public static ChatResponse emptyDone() {
public static ChatResponse emptyNotDone() {
return new ChatResponse(null, null, new Message(Role.ASSISTANT, "", null), true, null, null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -93,18 +93,24 @@ public Object aroundReadFrom(ReaderInterceptorContext context) throws IOExceptio
throw e;
}

// This piece of code deals with is the case where the last message from Ollama is not sent as entire line
// but in pieces. There is nothing we can do in this case except for returning empty responses.
// We have to keep track of when "done": true has been recorded in order to make sure that subsequent pieces
// are dealt with instead of throwing an exception. We keep track of this by using Vert.x duplicated context

if (chunk.contains("\"done\":true")) {
ctx.putLocal("done", true);
return ChatResponse.emptyDone();
} else {
if (Boolean.TRUE.equals(ctx.getLocal("done"))) {
return ChatResponse.emptyDone();
// This piece of code deals with is the case where a message from Ollama is not received as an entire line
// but in pieces (my guess is that it is a Vertx bug).
// There is nothing we can do in this case except for returning empty responses and in the meantime buffer the pieces
// by storing them in the Vertx Duplicated Context
String existingBuffer = ctx.getLocal("buffer");
if ((existingBuffer != null) && !existingBuffer.isEmpty()) {
if (chunk.endsWith("}")) {
ctx.putLocal("buffer", "");
String entireLine = existingBuffer + chunk;
return QuarkusJsonCodecFactory.SnakeCaseObjectMapperHolder.MAPPER.readValue(entireLine,
ChatResponse.class);
} else {
ctx.putLocal("buffer", existingBuffer + chunk);
return ChatResponse.emptyNotDone();
}
} else {
ctx.putLocal("buffer", chunk);
return ChatResponse.emptyNotDone();
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ public Supplier<ChatLanguageModel> chatModel(LangChain4jOllamaConfig runtimeConf
Options.Builder optionsBuilder = Options.builder()
.temperature(chatModelConfig.temperature())
.topK(chatModelConfig.topK())
.topP(chatModelConfig.topP())
.numPredict(chatModelConfig.numPredict());
.topP(chatModelConfig.topP());

if (chatModelConfig.numPredict().isPresent()) {
optionsBuilder.numPredict(chatModelConfig.numPredict().getAsInt());
}
if (chatModelConfig.stop().isPresent()) {
optionsBuilder.stop(chatModelConfig.stop().get());
}
Expand Down Expand Up @@ -123,9 +125,11 @@ public Supplier<StreamingChatLanguageModel> streamingChatModel(LangChain4jOllama
Options.Builder optionsBuilder = Options.builder()
.temperature(chatModelConfig.temperature())
.topK(chatModelConfig.topK())
.topP(chatModelConfig.topP())
.numPredict(chatModelConfig.numPredict());
.topP(chatModelConfig.topP());

if (chatModelConfig.numPredict().isPresent()) {
optionsBuilder.numPredict(chatModelConfig.numPredict().getAsInt());
}
if (chatModelConfig.stop().isPresent()) {
optionsBuilder.stop(chatModelConfig.stop().get());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.util.List;
import java.util.Optional;
import java.util.OptionalInt;

import io.quarkus.runtime.annotations.ConfigDocDefault;
import io.quarkus.runtime.annotations.ConfigGroup;
Expand All @@ -20,8 +21,7 @@ public interface ChatModelConfig {
/**
* Maximum number of tokens to predict when generating text
*/
@WithDefault("128")
Integer numPredict();
OptionalInt numPredict();

/**
* Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return
Expand Down

0 comments on commit 369ac9b

Please sign in to comment.