From 9f4c0e1dc79beecd0e781784733362eb6a54aebe Mon Sep 17 00:00:00 2001 From: Mark Pollack Date: Thu, 6 Nov 2025 23:56:27 -0500 Subject: [PATCH] feat(ollama): add thinking mode support for reasoning models Add support for Ollama's thinking mode, which enables reasoning-capable models to emit their internal reasoning process in a separate field. Key changes: - Implement ThinkOption sealed interface with boolean and level variants - Add think configuration to OllamaChatOptions with builder methods - Filter think from options map to send as top-level request field - Add QWEN3_4B_THINKING model constant for thinking-enabled variant - Upgrade Ollama test container to 0.12.10 for thinking support - Document auto-enable behavior for thinking-capable models Supported models: Qwen3, DeepSeek-v3.1, DeepSeek R1, GPT-OSS. Note: Thinking-capable models auto-enable thinking by default in Ollama 0.12+. Use .disableThinking() to explicitly disable. Signed-off-by: Mark Pollack --- .../ai/ollama/OllamaChatModel.java | 4 +- .../ai/ollama/api/OllamaApi.java | 51 +++++- .../ai/ollama/api/OllamaChatOptions.java | 145 +++++++++++++--- .../ai/ollama/api/OllamaModel.java | 18 ++ .../ai/ollama/api/OllamaOptions.java | 4 +- .../ai/ollama/api/ThinkOption.java | 159 ++++++++++++++++++ .../ollama/OllamaChatModelMetadataTests.java | 113 +++++++++++++ .../ai/ollama/OllamaImage.java | 2 +- .../ai/ollama/api/OllamaApiIT.java | 88 +++++++++- .../ai/ollama/api/ThinkOptionTests.java | 146 ++++++++++++++++ .../ROOT/pages/api/chat/ollama-chat.adoc | 139 ++++++++++++++- .../api/embeddings/ollama-embeddings.adoc | 2 + 12 files changed, 842 insertions(+), 29 deletions(-) create mode 100644 models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/ThinkOption.java create mode 100644 models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelMetadataTests.java create mode 100644 models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/api/ThinkOptionTests.java diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java index 7cb87eb8f3b..330c6e69c71 100644 --- a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java +++ b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java @@ -266,6 +266,7 @@ private ChatResponse internalCall(Prompt prompt, ChatResponse previousChatRespon if (ollamaResponse.promptEvalCount() != null && ollamaResponse.evalCount() != null) { generationMetadata = ChatGenerationMetadata.builder() .finishReason(ollamaResponse.doneReason()) + .metadata("thinking", ollamaResponse.message().thinking()) .build(); } @@ -505,7 +506,8 @@ else if (message.getMessageType() == MessageType.TOOL) { OllamaApi.ChatRequest.Builder requestBuilder = OllamaApi.ChatRequest.builder(requestOptions.getModel()) .stream(stream) .messages(ollamaMessages) - .options(requestOptions); + .options(requestOptions) + .think(requestOptions.getThinkOption()); if (requestOptions.getFormat() != null) { requestBuilder.format(requestOptions.getFormat()); diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java index 7f3b56c7e53..8a3f847ce36 100644 --- a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java +++ b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java @@ -402,7 +402,7 @@ public record ChatRequest( @JsonProperty("keep_alive") String keepAlive, @JsonProperty("tools") List tools, @JsonProperty("options") Map options, - @JsonProperty("think") Boolean think + @JsonProperty("think") ThinkOption think ) { public static Builder builder(String model) { @@ -475,7 +475,7 @@ public static final class Builder { private String keepAlive; private List tools = List.of(); private Map options = Map.of(); - private Boolean think; + private ThinkOption think; public Builder(String model) { Assert.notNull(model, "The model can not be null."); @@ -514,11 +514,56 @@ public Builder options(Map options) { return this; } - public Builder think(Boolean think) { + public Builder think(ThinkOption think) { this.think = think; return this; } + /** + * Enable thinking mode for the model. + * @return this builder + */ + public Builder enableThinking() { + this.think = ThinkOption.ThinkBoolean.ENABLED; + return this; + } + + /** + * Disable thinking mode for the model. + * @return this builder + */ + public Builder disableThinking() { + this.think = ThinkOption.ThinkBoolean.DISABLED; + return this; + } + + /** + * Set thinking level to "low" (for GPT-OSS model). + * @return this builder + */ + public Builder thinkLow() { + this.think = ThinkOption.ThinkLevel.LOW; + return this; + } + + /** + * Set thinking level to "medium" (for GPT-OSS model). + * @return this builder + */ + public Builder thinkMedium() { + this.think = ThinkOption.ThinkLevel.MEDIUM; + return this; + } + + /** + * Set thinking level to "high" (for GPT-OSS model). + * @return this builder + */ + public Builder thinkHigh() { + this.think = ThinkOption.ThinkLevel.HIGH; + return this; + } + @Deprecated public Builder options(OllamaOptions options) { Objects.requireNonNull(options, "The options can not be null."); diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaChatOptions.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaChatOptions.java index 2bcbce8101a..1cc849928a7 100644 --- a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaChatOptions.java +++ b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaChatOptions.java @@ -52,7 +52,8 @@ @JsonInclude(Include.NON_NULL) public class OllamaChatOptions implements ToolCallingChatOptions { - private static final List NON_SUPPORTED_FIELDS = List.of("model", "format", "keep_alive", "truncate"); + private static final List NON_SUPPORTED_FIELDS = List.of("model", "format", "keep_alive", "truncate", + "think"); // Following fields are options which must be set when the model is loaded into // memory. @@ -317,6 +318,31 @@ public class OllamaChatOptions implements ToolCallingChatOptions { @JsonProperty("truncate") private Boolean truncate; + /** + * The model should think before responding, if supported. + *

+ * Most models (Qwen 3, DeepSeek-v3.1, DeepSeek R1) use boolean enable/disable. + * The GPT-OSS model requires string levels: "low", "medium", or "high". + *

+ * Default Behavior (Ollama 0.12+): + *

    + *
  • Thinking-capable models (e.g., qwen3:*-thinking, deepseek-r1, deepseek-v3.1) + * auto-enable thinking by default when this field is not set.
  • + *
  • Standard models (e.g., qwen2.5:*, llama3.2) do not enable thinking by default.
  • + *
  • To explicitly control behavior, use {@link Builder#enableThinking()} or + * {@link Builder#disableThinking()}.
  • + *
+ *

+ * Use {@link Builder#enableThinking()}, {@link Builder#disableThinking()}, or + * {@link Builder#thinkHigh()} to configure this option. + * + * @see ThinkOption + * @see ThinkBoolean + * @see ThinkLevel + */ + @JsonProperty("think") + private ThinkOption thinkOption; + @JsonIgnore private Boolean internalToolExecutionEnabled; @@ -364,6 +390,7 @@ public static OllamaChatOptions fromOptions(OllamaChatOptions fromOptions) { .format(fromOptions.getFormat()) .keepAlive(fromOptions.getKeepAlive()) .truncate(fromOptions.getTruncate()) + .thinkOption(fromOptions.getThinkOption()) .useNUMA(fromOptions.getUseNUMA()) .numCtx(fromOptions.getNumCtx()) .numBatch(fromOptions.getNumBatch()) @@ -745,6 +772,14 @@ public void setTruncate(Boolean truncate) { this.truncate = truncate; } + public ThinkOption getThinkOption() { + return this.thinkOption; + } + + public void setThinkOption(ThinkOption thinkOption) { + this.thinkOption = thinkOption; + } + @Override @JsonIgnore public List getToolCallbacks() { @@ -824,17 +859,17 @@ public boolean equals(Object o) { OllamaChatOptions that = (OllamaChatOptions) o; return Objects.equals(this.model, that.model) && Objects.equals(this.format, that.format) && Objects.equals(this.keepAlive, that.keepAlive) && Objects.equals(this.truncate, that.truncate) - && Objects.equals(this.useNUMA, that.useNUMA) && Objects.equals(this.numCtx, that.numCtx) - && Objects.equals(this.numBatch, that.numBatch) && Objects.equals(this.numGPU, that.numGPU) - && Objects.equals(this.mainGPU, that.mainGPU) && Objects.equals(this.lowVRAM, that.lowVRAM) - && Objects.equals(this.f16KV, that.f16KV) && Objects.equals(this.logitsAll, that.logitsAll) - && Objects.equals(this.vocabOnly, that.vocabOnly) && Objects.equals(this.useMMap, that.useMMap) - && Objects.equals(this.useMLock, that.useMLock) && Objects.equals(this.numThread, that.numThread) - && Objects.equals(this.numKeep, that.numKeep) && Objects.equals(this.seed, that.seed) - && Objects.equals(this.numPredict, that.numPredict) && Objects.equals(this.topK, that.topK) - && Objects.equals(this.topP, that.topP) && Objects.equals(this.minP, that.minP) - && Objects.equals(this.tfsZ, that.tfsZ) && Objects.equals(this.typicalP, that.typicalP) - && Objects.equals(this.repeatLastN, that.repeatLastN) + && Objects.equals(this.thinkOption, that.thinkOption) && Objects.equals(this.useNUMA, that.useNUMA) + && Objects.equals(this.numCtx, that.numCtx) && Objects.equals(this.numBatch, that.numBatch) + && Objects.equals(this.numGPU, that.numGPU) && Objects.equals(this.mainGPU, that.mainGPU) + && Objects.equals(this.lowVRAM, that.lowVRAM) && Objects.equals(this.f16KV, that.f16KV) + && Objects.equals(this.logitsAll, that.logitsAll) && Objects.equals(this.vocabOnly, that.vocabOnly) + && Objects.equals(this.useMMap, that.useMMap) && Objects.equals(this.useMLock, that.useMLock) + && Objects.equals(this.numThread, that.numThread) && Objects.equals(this.numKeep, that.numKeep) + && Objects.equals(this.seed, that.seed) && Objects.equals(this.numPredict, that.numPredict) + && Objects.equals(this.topK, that.topK) && Objects.equals(this.topP, that.topP) + && Objects.equals(this.minP, that.minP) && Objects.equals(this.tfsZ, that.tfsZ) + && Objects.equals(this.typicalP, that.typicalP) && Objects.equals(this.repeatLastN, that.repeatLastN) && Objects.equals(this.temperature, that.temperature) && Objects.equals(this.repeatPenalty, that.repeatPenalty) && Objects.equals(this.presencePenalty, that.presencePenalty) @@ -849,13 +884,13 @@ public boolean equals(Object o) { @Override public int hashCode() { - return Objects.hash(this.model, this.format, this.keepAlive, this.truncate, this.useNUMA, this.numCtx, - this.numBatch, this.numGPU, this.mainGPU, this.lowVRAM, this.f16KV, this.logitsAll, this.vocabOnly, - this.useMMap, this.useMLock, this.numThread, this.numKeep, this.seed, this.numPredict, this.topK, - this.topP, this.minP, this.tfsZ, this.typicalP, this.repeatLastN, this.temperature, this.repeatPenalty, - this.presencePenalty, this.frequencyPenalty, this.mirostat, this.mirostatTau, this.mirostatEta, - this.penalizeNewline, this.stop, this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled, - this.toolContext); + return Objects.hash(this.model, this.format, this.keepAlive, this.truncate, this.thinkOption, this.useNUMA, + this.numCtx, this.numBatch, this.numGPU, this.mainGPU, this.lowVRAM, this.f16KV, this.logitsAll, + this.vocabOnly, this.useMMap, this.useMLock, this.numThread, this.numKeep, this.seed, this.numPredict, + this.topK, this.topP, this.minP, this.tfsZ, this.typicalP, this.repeatLastN, this.temperature, + this.repeatPenalty, this.presencePenalty, this.frequencyPenalty, this.mirostat, this.mirostatTau, + this.mirostatEta, this.penalizeNewline, this.stop, this.toolCallbacks, this.toolNames, + this.internalToolExecutionEnabled, this.toolContext); } public static final class Builder { @@ -1037,6 +1072,78 @@ public Builder stop(List stop) { return this; } + /** + * Enable thinking mode for the model. The model will include its reasoning + * process in the response's thinking field. + *

+ * Supported by models: Qwen 3, DeepSeek-v3.1, DeepSeek R1 + * @return this builder + * @see #disableThinking() + * @see #thinkLow() + */ + public Builder enableThinking() { + this.options.thinkOption = ThinkOption.ThinkBoolean.ENABLED; + return this; + } + + /** + * Disable thinking mode for the model. + * @return this builder + * @see #enableThinking() + */ + public Builder disableThinking() { + this.options.thinkOption = ThinkOption.ThinkBoolean.DISABLED; + return this; + } + + /** + * Set thinking level to "low" (for GPT-OSS model). + *

+ * GPT-OSS requires one of: low, medium, high. Boolean enable/disable is not + * supported for this model. + * @return this builder + * @see #thinkMedium() + * @see #thinkHigh() + */ + public Builder thinkLow() { + this.options.thinkOption = ThinkOption.ThinkLevel.LOW; + return this; + } + + /** + * Set thinking level to "medium" (for GPT-OSS model). + * @return this builder + * @see #thinkLow() + * @see #thinkHigh() + */ + public Builder thinkMedium() { + this.options.thinkOption = ThinkOption.ThinkLevel.MEDIUM; + return this; + } + + /** + * Set thinking level to "high" (for GPT-OSS model). + * @return this builder + * @see #thinkLow() + * @see #thinkMedium() + */ + public Builder thinkHigh() { + this.options.thinkOption = ThinkOption.ThinkLevel.HIGH; + return this; + } + + /** + * Set the think option explicitly. Use {@link #enableThinking()}, + * {@link #disableThinking()}, {@link #thinkLow()}, {@link #thinkMedium()}, or + * {@link #thinkHigh()} for more convenient alternatives. + * @param thinkOption the think option + * @return this builder + */ + public Builder thinkOption(ThinkOption thinkOption) { + this.options.thinkOption = thinkOption; + return this; + } + public Builder toolCallbacks(List toolCallbacks) { this.options.setToolCallbacks(toolCallbacks); return this; diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java index 4679b6e2539..dd27f5c6985 100644 --- a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java +++ b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java @@ -23,6 +23,7 @@ * * @author Siarhei Blashuk * @author Thomas Vitale + * @author Sun Yuhan * @since 1.0.0 */ public enum OllamaModel implements ChatModelDescription { @@ -51,6 +52,23 @@ public enum OllamaModel implements ChatModelDescription { */ QWEN3_4B("qwen3:4b"), + /** + * Qwen3 4B with thinking support. This variant auto-enables thinking by default in + * Ollama 0.12+, providing separate reasoning traces in the response. + * @see OllamaChatOptions#thinkOption + */ + QWEN3_4B_THINKING("qwen3:4b-thinking"), + + /** + * Qwen3 1.7b + */ + QWEN_3_1_7_B("qwen3:1.7b"), + + /** + * Qwen3 0.6b + */ + QWEN_3_06B("qwen3:0.6b"), + /** * QwQ is the reasoning model of the Qwen series. */ diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java index 84c0752654b..5292e63d8a2 100644 --- a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java +++ b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java @@ -44,6 +44,7 @@ * @author Christian Tzolov * @author Thomas Vitale * @author Ilayaperumal Gopinathan + * @author Sun Yuhan * @since 0.8.0 * @see Ollama @@ -55,7 +56,8 @@ @Deprecated public class OllamaOptions implements ToolCallingChatOptions, EmbeddingOptions { - private static final List NON_SUPPORTED_FIELDS = List.of("model", "format", "keep_alive", "truncate"); + private static final List NON_SUPPORTED_FIELDS = List.of("model", "format", "keep_alive", "truncate", + "think"); // Following fields are options which must be set when the model is loaded into // memory. diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/ThinkOption.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/ThinkOption.java new file mode 100644 index 00000000000..79053c2adfb --- /dev/null +++ b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/ThinkOption.java @@ -0,0 +1,159 @@ +/* + * Copyright 2023-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.ollama.api; + +import java.io.IOException; +import java.util.List; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; + +/** + * Represents the thinking option for Ollama models. The think option controls whether + * models emit their reasoning trace before the final answer. + *

+ * Most models (Qwen 3, DeepSeek-v3.1, DeepSeek R1) accept boolean enable/disable. The + * GPT-OSS model requires string levels: "low", "medium", or "high". + * + * @author Mark Pollack + * @since 1.1.0 + * @see ThinkBoolean + * @see ThinkLevel + */ +@JsonSerialize(using = ThinkOption.ThinkOptionSerializer.class) +@JsonDeserialize(using = ThinkOption.ThinkOptionDeserializer.class) +public sealed interface ThinkOption { + + /** + * Converts this think option to its JSON representation. + * @return the JSON value (Boolean or String) + */ + Object toJsonValue(); + + /** + * Serializer that writes ThinkOption as raw boolean or string values. + */ + class ThinkOptionSerializer extends JsonSerializer { + + @Override + public void serialize(ThinkOption value, JsonGenerator gen, SerializerProvider serializers) throws IOException { + if (value == null) { + gen.writeNull(); + } + else { + gen.writeObject(value.toJsonValue()); + } + } + + } + + /** + * Deserializer that reads boolean or string values into ThinkOption instances. + */ + class ThinkOptionDeserializer extends JsonDeserializer { + + @Override + public ThinkOption deserialize(JsonParser p, DeserializationContext ctxt) throws IOException { + JsonToken token = p.currentToken(); + if (token == JsonToken.VALUE_TRUE) { + return ThinkBoolean.ENABLED; + } + else if (token == JsonToken.VALUE_FALSE) { + return ThinkBoolean.DISABLED; + } + else if (token == JsonToken.VALUE_STRING) { + return new ThinkLevel(p.getValueAsString()); + } + else if (token == JsonToken.VALUE_NULL) { + return null; + } + throw new IOException("Cannot deserialize ThinkOption from token: " + token); + } + + } + + /** + * Boolean-style think option for models that support simple enable/disable. Supported + * by Qwen 3, DeepSeek-v3.1, and DeepSeek R1 models. + * + * @param enabled whether thinking is enabled + */ + record ThinkBoolean(boolean enabled) implements ThinkOption { + + /** + * Constant for enabled thinking. + */ + public static final ThinkBoolean ENABLED = new ThinkBoolean(true); + + /** + * Constant for disabled thinking. + */ + public static final ThinkBoolean DISABLED = new ThinkBoolean(false); + + @Override + public Object toJsonValue() { + return this.enabled; + } + + } + + /** + * String-level think option for the GPT-OSS model which requires explicit levels. + * + * @param level the thinking level: "low", "medium", or "high" + */ + record ThinkLevel(String level) implements ThinkOption { + + /** + * Low thinking level for GPT-OSS. + */ + public static final ThinkLevel LOW = new ThinkLevel("low"); + + /** + * Medium thinking level for GPT-OSS. + */ + public static final ThinkLevel MEDIUM = new ThinkLevel("medium"); + + /** + * High thinking level for GPT-OSS. + */ + public static final ThinkLevel HIGH = new ThinkLevel("high"); + + /** + * Creates a new ThinkLevel with validation. + */ + public ThinkLevel { + if (level != null && !List.of("low", "medium", "high").contains(level)) { + throw new IllegalArgumentException("think level must be 'low', 'medium', or 'high', got: " + level); + } + } + + @Override + public Object toJsonValue() { + return this.level; + } + + } + +} diff --git a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelMetadataTests.java b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelMetadataTests.java new file mode 100644 index 00000000000..0a168ae341c --- /dev/null +++ b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelMetadataTests.java @@ -0,0 +1,113 @@ +/* + * Copyright 2023-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.ollama; + +import io.micrometer.observation.tck.TestObservationRegistry; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import org.springframework.ai.chat.metadata.ChatGenerationMetadata; +import org.springframework.ai.chat.model.ChatResponse; +import org.springframework.ai.chat.prompt.Prompt; +import org.springframework.ai.ollama.api.OllamaApi; +import org.springframework.ai.ollama.api.OllamaChatOptions; +import org.springframework.ai.ollama.api.OllamaModel; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.SpringBootConfiguration; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.Bean; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Unit Tests for {@link OllamaChatModel} asserting AI metadata. + * + * @author Sun Yuhan + */ +@SpringBootTest(classes = OllamaChatModelMetadataTests.Config.class) +class OllamaChatModelMetadataTests extends BaseOllamaIT { + + private static final String MODEL = OllamaModel.QWEN_3_06B.getName(); + + @Autowired + TestObservationRegistry observationRegistry; + + @Autowired + OllamaChatModel chatModel; + + @BeforeEach + void beforeEach() { + this.observationRegistry.clear(); + } + + @Test + void ollamaThinkingMetadataCaptured() { + var options = OllamaChatOptions.builder().model(MODEL).enableThinking().build(); + + Prompt prompt = new Prompt("Why is the sky blue?", options); + + ChatResponse chatResponse = this.chatModel.call(prompt); + assertThat(chatResponse.getResult().getOutput().getText()).isNotEmpty(); + + chatResponse.getResults().forEach(generation -> { + ChatGenerationMetadata chatGenerationMetadata = generation.getMetadata(); + assertThat(chatGenerationMetadata).isNotNull(); + assertThat(chatGenerationMetadata.containsKey("thinking")); + }); + } + + @Test + void ollamaThinkingMetadataNotCapturedWhenSetThinkFlagToFalse() { + // Note: Thinking-capable models (e.g., qwen3:*) auto-enable thinking by default + // in Ollama 0.12+. + // This test explicitly disables thinking to verify null metadata is returned. + var options = OllamaChatOptions.builder().model(MODEL).disableThinking().build(); + + Prompt prompt = new Prompt("Why is the sky blue?", options); + + ChatResponse chatResponse = this.chatModel.call(prompt); + assertThat(chatResponse.getResult().getOutput().getText()).isNotEmpty(); + + chatResponse.getResults().forEach(generation -> { + ChatGenerationMetadata chatGenerationMetadata = generation.getMetadata(); + assertThat(chatGenerationMetadata).isNotNull(); + var thinking = chatGenerationMetadata.get("thinking"); + assertThat(thinking).isNull(); + }); + } + + @SpringBootConfiguration + static class Config { + + @Bean + public TestObservationRegistry observationRegistry() { + return TestObservationRegistry.create(); + } + + @Bean + public OllamaApi ollamaApi() { + return initializeOllama(MODEL); + } + + @Bean + public OllamaChatModel openAiChatModel(OllamaApi ollamaApi, TestObservationRegistry observationRegistry) { + return OllamaChatModel.builder().ollamaApi(ollamaApi).observationRegistry(observationRegistry).build(); + } + + } + +} diff --git a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaImage.java b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaImage.java index e027789ff5a..3301f2d9036 100644 --- a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaImage.java +++ b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaImage.java @@ -23,7 +23,7 @@ */ public final class OllamaImage { - public static final DockerImageName DEFAULT_IMAGE = DockerImageName.parse("ollama/ollama:0.10.1"); + public static final DockerImageName DEFAULT_IMAGE = DockerImageName.parse("ollama/ollama:0.12.10"); private OllamaImage() { diff --git a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/api/OllamaApiIT.java b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/api/OllamaApiIT.java index b31ba5365f8..89c5bbac9ee 100644 --- a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/api/OllamaApiIT.java +++ b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/api/OllamaApiIT.java @@ -33,10 +33,12 @@ import org.springframework.ai.ollama.api.OllamaApi.Message.Role; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertNull; /** * @author Christian Tzolov * @author Thomas Vitale + * @author Sun Yuhan */ public class OllamaApiIT extends BaseOllamaIT { @@ -44,7 +46,7 @@ public class OllamaApiIT extends BaseOllamaIT { private static final String EMBEDDING_MODEL = OllamaModel.NOMIC_EMBED_TEXT.getName(); - private static final String THINKING_MODEL = OllamaModel.QWEN3_4B.getName(); + private static final String THINKING_MODEL = OllamaModel.QWEN3_4B_THINKING.getName(); @BeforeAll public static void beforeAll() throws IOException, InterruptedException { @@ -130,8 +132,7 @@ public void think() { .content("What is the capital of Bulgaria and what is the size? " + "What it the national anthem?") .build())) - .options(OllamaOptions.builder().temperature(0.9).build()) - .think(true) + .options(OllamaChatOptions.builder().temperature(0.9).enableThinking().build()) .build(); ChatResponse response = getOllamaApi().chat(request); @@ -146,4 +147,85 @@ public void think() { assertThat(response.message().thinking()).isNotEmpty(); } + @Test + public void chatWithThinking() { + var request = ChatRequest.builder(THINKING_MODEL) + .stream(true) + .messages(List.of(Message.builder(Role.USER) + .content("What is the capital of Bulgaria and what is the size? " + "What it the national anthem?") + .build())) + .options(OllamaChatOptions.builder().temperature(0.9).enableThinking().build()) + .build(); + + Flux response = getOllamaApi().streamingChat(request); + + List responses = response.collectList().block(); + System.out.println(responses); + + assertThat(responses).isNotNull(); + assertThat(responses.stream() + .filter(r -> r.message() != null) + .map(r -> r.message().thinking()) + .collect(Collectors.joining(System.lineSeparator()))).contains("Sofia"); + + ChatResponse lastResponse = responses.get(responses.size() - 1); + assertThat(lastResponse.message().content()).isEmpty(); + assertNull(lastResponse.message().thinking()); + assertThat(lastResponse.done()).isTrue(); + } + + @Test + public void streamChatWithThinking() { + var request = ChatRequest.builder(THINKING_MODEL) + .stream(true) + .messages(List.of(Message.builder(Role.USER).content("What are the planets in the solar system?").build())) + .options(OllamaChatOptions.builder().temperature(0.9).enableThinking().build()) + .build(); + + Flux response = getOllamaApi().streamingChat(request); + + List responses = response.collectList().block(); + System.out.println(responses); + + assertThat(responses).isNotNull(); + assertThat(responses.stream() + .filter(r -> r.message() != null) + .map(r -> r.message().thinking()) + .collect(Collectors.joining(System.lineSeparator()))).contains("solar"); + + ChatResponse lastResponse = responses.get(responses.size() - 1); + assertThat(lastResponse.message().content()).isEmpty(); + assertNull(lastResponse.message().thinking()); + assertThat(lastResponse.done()).isTrue(); + } + + @Test + public void streamChatWithoutThinking() { + var request = ChatRequest.builder(THINKING_MODEL) + .stream(true) + .messages(List.of(Message.builder(Role.USER).content("What are the planets in the solar system?").build())) + .options(OllamaChatOptions.builder().temperature(0.9).disableThinking().build()) + .build(); + + Flux response = getOllamaApi().streamingChat(request); + + List responses = response.collectList().block(); + System.out.println(responses); + + assertThat(responses).isNotNull(); + + assertThat(responses.stream() + .filter(r -> r.message() != null) + .map(r -> r.message().content()) + .collect(Collectors.joining(System.lineSeparator()))).contains("Earth"); + + assertThat(responses.stream().filter(r -> r.message() != null).allMatch(r -> r.message().thinking() == null)) + .isTrue(); + + ChatResponse lastResponse = responses.get(responses.size() - 1); + assertThat(lastResponse.message().content()).isEmpty(); + assertNull(lastResponse.message().thinking()); + assertThat(lastResponse.done()).isTrue(); + } + } diff --git a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/api/ThinkOptionTests.java b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/api/ThinkOptionTests.java new file mode 100644 index 00000000000..f6ce9c51312 --- /dev/null +++ b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/api/ThinkOptionTests.java @@ -0,0 +1,146 @@ +/* + * Copyright 2023-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.ollama.api; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for {@link ThinkOption} serialization and deserialization. + * + * @author Mark Pollack + */ +class ThinkOptionTests { + + private final ObjectMapper objectMapper = new ObjectMapper(); + + @Test + void testThinkBooleanEnabledSerialization() throws Exception { + ThinkOption option = ThinkOption.ThinkBoolean.ENABLED; + String json = this.objectMapper.writeValueAsString(option); + assertThat(json).isEqualTo("true"); + } + + @Test + void testThinkBooleanDisabledSerialization() throws Exception { + ThinkOption option = ThinkOption.ThinkBoolean.DISABLED; + String json = this.objectMapper.writeValueAsString(option); + assertThat(json).isEqualTo("false"); + } + + @Test + void testThinkLevelLowSerialization() throws Exception { + ThinkOption option = ThinkOption.ThinkLevel.LOW; + String json = this.objectMapper.writeValueAsString(option); + assertThat(json).isEqualTo("\"low\""); + } + + @Test + void testThinkLevelMediumSerialization() throws Exception { + ThinkOption option = ThinkOption.ThinkLevel.MEDIUM; + String json = this.objectMapper.writeValueAsString(option); + assertThat(json).isEqualTo("\"medium\""); + } + + @Test + void testThinkLevelHighSerialization() throws Exception { + ThinkOption option = ThinkOption.ThinkLevel.HIGH; + String json = this.objectMapper.writeValueAsString(option); + assertThat(json).isEqualTo("\"high\""); + } + + @Test + void testDeserializeBooleanTrue() throws Exception { + String json = "true"; + ThinkOption option = this.objectMapper.readValue(json, ThinkOption.class); + assertThat(option).isEqualTo(ThinkOption.ThinkBoolean.ENABLED); + assertThat(option).isInstanceOf(ThinkOption.ThinkBoolean.class); + assertThat(((ThinkOption.ThinkBoolean) option).enabled()).isTrue(); + } + + @Test + void testDeserializeBooleanFalse() throws Exception { + String json = "false"; + ThinkOption option = this.objectMapper.readValue(json, ThinkOption.class); + assertThat(option).isEqualTo(ThinkOption.ThinkBoolean.DISABLED); + assertThat(option).isInstanceOf(ThinkOption.ThinkBoolean.class); + assertThat(((ThinkOption.ThinkBoolean) option).enabled()).isFalse(); + } + + @Test + void testDeserializeStringLow() throws Exception { + String json = "\"low\""; + ThinkOption option = this.objectMapper.readValue(json, ThinkOption.class); + assertThat(option).isInstanceOf(ThinkOption.ThinkLevel.class); + assertThat(((ThinkOption.ThinkLevel) option).level()).isEqualTo("low"); + } + + @Test + void testDeserializeStringMedium() throws Exception { + String json = "\"medium\""; + ThinkOption option = this.objectMapper.readValue(json, ThinkOption.class); + assertThat(option).isInstanceOf(ThinkOption.ThinkLevel.class); + assertThat(((ThinkOption.ThinkLevel) option).level()).isEqualTo("medium"); + } + + @Test + void testDeserializeStringHigh() throws Exception { + String json = "\"high\""; + ThinkOption option = this.objectMapper.readValue(json, ThinkOption.class); + assertThat(option).isInstanceOf(ThinkOption.ThinkLevel.class); + assertThat(((ThinkOption.ThinkLevel) option).level()).isEqualTo("high"); + } + + @Test + void testDeserializeNull() throws Exception { + String json = "null"; + ThinkOption option = this.objectMapper.readValue(json, ThinkOption.class); + assertThat(option).isNull(); + } + + @Test + void testThinkLevelInvalidStringThrowsException() { + assertThatThrownBy(() -> new ThinkOption.ThinkLevel("invalid")).isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("think level must be 'low', 'medium', or 'high'"); + } + + @Test + void testThinkLevelConstants() { + assertThat(ThinkOption.ThinkLevel.LOW.level()).isEqualTo("low"); + assertThat(ThinkOption.ThinkLevel.MEDIUM.level()).isEqualTo("medium"); + assertThat(ThinkOption.ThinkLevel.HIGH.level()).isEqualTo("high"); + } + + @Test + void testThinkBooleanConstants() { + assertThat(ThinkOption.ThinkBoolean.ENABLED.enabled()).isTrue(); + assertThat(ThinkOption.ThinkBoolean.DISABLED.enabled()).isFalse(); + } + + @Test + void testToJsonValue() { + assertThat(ThinkOption.ThinkBoolean.ENABLED.toJsonValue()).isEqualTo(true); + assertThat(ThinkOption.ThinkBoolean.DISABLED.toJsonValue()).isEqualTo(false); + assertThat(ThinkOption.ThinkLevel.LOW.toJsonValue()).isEqualTo("low"); + assertThat(ThinkOption.ThinkLevel.MEDIUM.toJsonValue()).isEqualTo("medium"); + assertThat(ThinkOption.ThinkLevel.HIGH.toJsonValue()).isEqualTo("high"); + } + +} diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/ollama-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/ollama-chat.adoc index fae113af652..2b78a3b6840 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/ollama-chat.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/ollama-chat.adoc @@ -159,7 +159,9 @@ TIP: All properties prefixed with `spring.ai.ollama.chat.options` can be overrid == Runtime Options [[chat-options]] -The https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaChatOptions.java[OllamaChatOptions.java] class provides model configurations, such as the model to use, the temperature, etc. +The https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaChatOptions.java[OllamaChatOptions.java] class provides model configurations, such as the model to use, the temperature, thinking mode, etc. + +IMPORTANT: The `OllamaOptions` class has been deprecated. Use `OllamaChatOptions` for chat models and `OllamaEmbeddingOptions` for embedding models instead. The new classes provide type-safe, model-specific configuration options. On start-up, the default options can be configured with the `OllamaChatModel(api, options)` constructor or the `spring.ai.ollama.chat.options.*` properties. @@ -250,6 +252,141 @@ Read more about xref:api/tools.adoc[Tool Calling]. TIP: You need Ollama 0.2.8 or newer to use the functional calling capabilities and Ollama 0.4.6 or newer to use them in streaming mode. +== Thinking Mode (Reasoning) + +Ollama supports thinking mode for reasoning models that can emit their internal reasoning process before providing a final answer. This feature is available for models like Qwen3, DeepSeek-v3.1, DeepSeek R1, and GPT-OSS. + +TIP: Thinking mode helps you understand the model's reasoning process and can improve response quality for complex problems. + +IMPORTANT: *Default Behavior (Ollama 0.12+)*: Thinking-capable models (such as `qwen3:*-thinking`, `deepseek-r1`, `deepseek-v3.1`) *auto-enable thinking by default* when the think option is not explicitly set. Standard models (such as `qwen2.5:*`, `llama3.2`) do not enable thinking by default. To explicitly control this behavior, use `.enableThinking()` or `.disableThinking()`. + +=== Enabling Thinking Mode + +Most models (Qwen3, DeepSeek-v3.1, DeepSeek R1) support simple boolean enable/disable: + +[source,java] +---- +ChatResponse response = chatModel.call( + new Prompt( + "How many letter 'r' are in the word 'strawberry'?", + OllamaChatOptions.builder() + .model("qwen3") + .enableThinking() + .build() + )); + +// Access the thinking process +String thinking = response.getResult().getMetadata().get("thinking"); +String answer = response.getResult().getOutput().getContent(); +---- + +You can also disable thinking explicitly: + +[source,java] +---- +ChatResponse response = chatModel.call( + new Prompt( + "What is 2+2?", + OllamaChatOptions.builder() + .model("deepseek-r1") + .disableThinking() + .build() + )); +---- + +=== Thinking Levels (GPT-OSS Only) + +The GPT-OSS model requires explicit thinking levels instead of boolean values: + +[source,java] +---- +// Low thinking level +ChatResponse response = chatModel.call( + new Prompt( + "Generate a short headline", + OllamaChatOptions.builder() + .model("gpt-oss") + .thinkLow() + .build() + )); + +// Medium thinking level +ChatResponse response = chatModel.call( + new Prompt( + "Analyze this dataset", + OllamaChatOptions.builder() + .model("gpt-oss") + .thinkMedium() + .build() + )); + +// High thinking level +ChatResponse response = chatModel.call( + new Prompt( + "Solve this complex problem", + OllamaChatOptions.builder() + .model("gpt-oss") + .thinkHigh() + .build() + )); +---- + +=== Accessing Thinking Content + +The thinking content is available in the response metadata: + +[source,java] +---- +ChatResponse response = chatModel.call( + new Prompt( + "Calculate 17 × 23", + OllamaChatOptions.builder() + .model("deepseek-r1") + .enableThinking() + .build() + )); + +// Get the reasoning process +String thinking = response.getResult().getMetadata().get("thinking"); +System.out.println("Reasoning: " + thinking); +// Output: "17 × 20 = 340, 17 × 3 = 51, 340 + 51 = 391" + +// Get the final answer +String answer = response.getResult().getOutput().getContent(); +System.out.println("Answer: " + answer); +// Output: "The answer is 391" +---- + +=== Streaming with Thinking + +Thinking mode works with streaming responses as well: + +[source,java] +---- +Flux stream = chatModel.stream( + new Prompt( + "Explain quantum entanglement", + OllamaChatOptions.builder() + .model("qwen3") + .enableThinking() + .build() + )); + +stream.subscribe(response -> { + String thinking = response.getResult().getMetadata().get("thinking"); + String content = response.getResult().getOutput().getContent(); + + if (thinking != null && !thinking.isEmpty()) { + System.out.println("[Thinking] " + thinking); + } + if (content != null && !content.isEmpty()) { + System.out.println("[Response] " + content); + } +}); +---- + +NOTE: When thinking is disabled or not set, the `thinking` metadata field will be null or empty. + == Multimodal Multimodality refers to a model's ability to simultaneously understand and process information from various sources, including text, images, audio, and other data formats. diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/embeddings/ollama-embeddings.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/embeddings/ollama-embeddings.adoc index 430c55ad540..d26ed9fb59e 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/embeddings/ollama-embeddings.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/embeddings/ollama-embeddings.adoc @@ -164,6 +164,8 @@ TIP: All properties prefixed with `spring.ai.ollama.embedding.options` can be ov The https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaEmbeddingOptions.java[OllamaEmbeddingOptions.java] provides the Ollama configurations, such as the model to use, the low level GPU and CPU tuning, etc. +IMPORTANT: The `OllamaOptions` class has been deprecated. Use `OllamaChatOptions` for chat models and `OllamaEmbeddingOptions` for embedding models instead. The new classes provide type-safe, model-specific configuration options. + The default options can be configured using the `spring.ai.ollama.embedding.options` properties as well. At start-time use the `OllamaEmbeddingModel(OllamaApi ollamaApi, OllamaEmbeddingOptions defaultOptions)` to configure the default options used for all embedding requests.