spring-projects · markpollack · Nov 7, 2025
diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java
@@ -266,6 +266,7 @@ private ChatResponse internalCall(Prompt prompt, ChatResponse previousChatRespon
 				if (ollamaResponse.promptEvalCount() != null && ollamaResponse.evalCount() != null) {
 					generationMetadata = ChatGenerationMetadata.builder()
 						.finishReason(ollamaResponse.doneReason())
+						.metadata("thinking", ollamaResponse.message().thinking())
 						.build();
 				}
 
@@ -505,7 +506,8 @@ else if (message.getMessageType() == MessageType.TOOL) {
 		OllamaApi.ChatRequest.Builder requestBuilder = OllamaApi.ChatRequest.builder(requestOptions.getModel())
 			.stream(stream)
 			.messages(ollamaMessages)
-			.options(requestOptions);
+			.options(requestOptions)
+			.think(requestOptions.getThinkOption());
 
 		if (requestOptions.getFormat() != null) {
 			requestBuilder.format(requestOptions.getFormat());

diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java
@@ -402,7 +402,7 @@ public record ChatRequest(
 			@JsonProperty("keep_alive") String keepAlive,
 			@JsonProperty("tools") List<Tool> tools,
 			@JsonProperty("options") Map<String, Object> options,
-			@JsonProperty("think") Boolean think
+			@JsonProperty("think") ThinkOption think
 	) {
 
 		public static Builder builder(String model) {
@@ -475,7 +475,7 @@ public static final class Builder {
 			private String keepAlive;
 			private List<Tool> tools = List.of();
 			private Map<String, Object> options = Map.of();
-			private Boolean think;
+			private ThinkOption think;
 
 			public Builder(String model) {
 				Assert.notNull(model, "The model can not be null.");
@@ -514,11 +514,56 @@ public Builder options(Map<String, Object> options) {
 				return this;
 			}
 
-			public Builder think(Boolean think) {
+			public Builder think(ThinkOption think) {
 				this.think = think;
 				return this;
 			}
 
+			/**
+			 * Enable thinking mode for the model.
+			 * @return this builder
+			 */
+			public Builder enableThinking() {
+				this.think = ThinkOption.ThinkBoolean.ENABLED;
+				return this;
+			}
+
+			/**
+			 * Disable thinking mode for the model.
+			 * @return this builder
+			 */
+			public Builder disableThinking() {
+				this.think = ThinkOption.ThinkBoolean.DISABLED;
+				return this;
+			}
+
+			/**
+			 * Set thinking level to "low" (for GPT-OSS model).
+			 * @return this builder
+			 */
+			public Builder thinkLow() {
+				this.think = ThinkOption.ThinkLevel.LOW;
+				return this;
+			}
+
+			/**
+			 * Set thinking level to "medium" (for GPT-OSS model).
+			 * @return this builder
+			 */
+			public Builder thinkMedium() {
+				this.think = ThinkOption.ThinkLevel.MEDIUM;
+				return this;
+			}
+
+			/**
+			 * Set thinking level to "high" (for GPT-OSS model).
+			 * @return this builder
+			 */
+			public Builder thinkHigh() {
+				this.think = ThinkOption.ThinkLevel.HIGH;
+				return this;
+			}
+
 			@Deprecated
 			public Builder options(OllamaOptions options) {
 				Objects.requireNonNull(options, "The options can not be null.");

diff --git a/...s/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaChatOptions.java b/...s/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaChatOptions.java
@@ -52,7 +52,8 @@
 @JsonInclude(Include.NON_NULL)
 public class OllamaChatOptions implements ToolCallingChatOptions {
 
-	private static final List<String> NON_SUPPORTED_FIELDS = List.of("model", "format", "keep_alive", "truncate");
+	private static final List<String> NON_SUPPORTED_FIELDS = List.of("model", "format", "keep_alive", "truncate",
+			"think");
 
 	// Following fields are options which must be set when the model is loaded into
 	// memory.
@@ -317,6 +318,31 @@ public class OllamaChatOptions implements ToolCallingChatOptions {
 	@JsonProperty("truncate")
 	private Boolean truncate;
 
+	/**
+	 * The model should think before responding, if supported.
+	 * <p>
+	 * Most models (Qwen 3, DeepSeek-v3.1, DeepSeek R1) use boolean enable/disable.
+	 * The GPT-OSS model requires string levels: "low", "medium", or "high".
+	 * <p>
+	 * <strong>Default Behavior (Ollama 0.12+):</strong>
+	 * <ul>
+	 * <li>Thinking-capable models (e.g., qwen3:*-thinking, deepseek-r1, deepseek-v3.1)
+	 * <strong>auto-enable thinking by default</strong> when this field is not set.</li>
+	 * <li>Standard models (e.g., qwen2.5:*, llama3.2) do not enable thinking by default.</li>
+	 * <li>To explicitly control behavior, use {@link Builder#enableThinking()} or
+	 * {@link Builder#disableThinking()}.</li>
+	 * </ul>
+	 * <p>
+	 * Use {@link Builder#enableThinking()}, {@link Builder#disableThinking()}, or
+	 * {@link Builder#thinkHigh()} to configure this option.
+	 *
+	 * @see ThinkOption
+	 * @see ThinkBoolean
+	 * @see ThinkLevel
+	 */
+	@JsonProperty("think")
+	private ThinkOption thinkOption;
+
 	@JsonIgnore
 	private Boolean internalToolExecutionEnabled;
 
@@ -364,6 +390,7 @@ public static OllamaChatOptions fromOptions(OllamaChatOptions fromOptions) {
 				.format(fromOptions.getFormat())
 				.keepAlive(fromOptions.getKeepAlive())
 				.truncate(fromOptions.getTruncate())
+				.thinkOption(fromOptions.getThinkOption())
 				.useNUMA(fromOptions.getUseNUMA())
 				.numCtx(fromOptions.getNumCtx())
 				.numBatch(fromOptions.getNumBatch())
@@ -745,6 +772,14 @@ public void setTruncate(Boolean truncate) {
 		this.truncate = truncate;
 	}
 
+	public ThinkOption getThinkOption() {
+		return this.thinkOption;
+	}
+
+	public void setThinkOption(ThinkOption thinkOption) {
+		this.thinkOption = thinkOption;
+	}
+
 	@Override
 	@JsonIgnore
 	public List<ToolCallback> getToolCallbacks() {
@@ -824,17 +859,17 @@ public boolean equals(Object o) {
 		OllamaChatOptions that = (OllamaChatOptions) o;
 		return Objects.equals(this.model, that.model) && Objects.equals(this.format, that.format)
 				&& Objects.equals(this.keepAlive, that.keepAlive) && Objects.equals(this.truncate, that.truncate)
-				&& Objects.equals(this.useNUMA, that.useNUMA) && Objects.equals(this.numCtx, that.numCtx)
-				&& Objects.equals(this.numBatch, that.numBatch) && Objects.equals(this.numGPU, that.numGPU)
-				&& Objects.equals(this.mainGPU, that.mainGPU) && Objects.equals(this.lowVRAM, that.lowVRAM)
-				&& Objects.equals(this.f16KV, that.f16KV) && Objects.equals(this.logitsAll, that.logitsAll)
-				&& Objects.equals(this.vocabOnly, that.vocabOnly) && Objects.equals(this.useMMap, that.useMMap)
-				&& Objects.equals(this.useMLock, that.useMLock) && Objects.equals(this.numThread, that.numThread)
-				&& Objects.equals(this.numKeep, that.numKeep) && Objects.equals(this.seed, that.seed)
-				&& Objects.equals(this.numPredict, that.numPredict) && Objects.equals(this.topK, that.topK)
-				&& Objects.equals(this.topP, that.topP) && Objects.equals(this.minP, that.minP)
-				&& Objects.equals(this.tfsZ, that.tfsZ) && Objects.equals(this.typicalP, that.typicalP)
-				&& Objects.equals(this.repeatLastN, that.repeatLastN)
+				&& Objects.equals(this.thinkOption, that.thinkOption) && Objects.equals(this.useNUMA, that.useNUMA)
+				&& Objects.equals(this.numCtx, that.numCtx) && Objects.equals(this.numBatch, that.numBatch)
+				&& Objects.equals(this.numGPU, that.numGPU) && Objects.equals(this.mainGPU, that.mainGPU)
+				&& Objects.equals(this.lowVRAM, that.lowVRAM) && Objects.equals(this.f16KV, that.f16KV)
+				&& Objects.equals(this.logitsAll, that.logitsAll) && Objects.equals(this.vocabOnly, that.vocabOnly)
+				&& Objects.equals(this.useMMap, that.useMMap) && Objects.equals(this.useMLock, that.useMLock)
+				&& Objects.equals(this.numThread, that.numThread) && Objects.equals(this.numKeep, that.numKeep)
+				&& Objects.equals(this.seed, that.seed) && Objects.equals(this.numPredict, that.numPredict)
+				&& Objects.equals(this.topK, that.topK) && Objects.equals(this.topP, that.topP)
+				&& Objects.equals(this.minP, that.minP) && Objects.equals(this.tfsZ, that.tfsZ)
+				&& Objects.equals(this.typicalP, that.typicalP) && Objects.equals(this.repeatLastN, that.repeatLastN)
 				&& Objects.equals(this.temperature, that.temperature)
 				&& Objects.equals(this.repeatPenalty, that.repeatPenalty)
 				&& Objects.equals(this.presencePenalty, that.presencePenalty)
@@ -849,13 +884,13 @@ public boolean equals(Object o) {
 
 	@Override
 	public int hashCode() {
-		return Objects.hash(this.model, this.format, this.keepAlive, this.truncate, this.useNUMA, this.numCtx,
-				this.numBatch, this.numGPU, this.mainGPU, this.lowVRAM, this.f16KV, this.logitsAll, this.vocabOnly,
-				this.useMMap, this.useMLock, this.numThread, this.numKeep, this.seed, this.numPredict, this.topK,
-				this.topP, this.minP, this.tfsZ, this.typicalP, this.repeatLastN, this.temperature, this.repeatPenalty,
-				this.presencePenalty, this.frequencyPenalty, this.mirostat, this.mirostatTau, this.mirostatEta,
-				this.penalizeNewline, this.stop, this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled,
-				this.toolContext);
+		return Objects.hash(this.model, this.format, this.keepAlive, this.truncate, this.thinkOption, this.useNUMA,
+				this.numCtx, this.numBatch, this.numGPU, this.mainGPU, this.lowVRAM, this.f16KV, this.logitsAll,
+				this.vocabOnly, this.useMMap, this.useMLock, this.numThread, this.numKeep, this.seed, this.numPredict,
+				this.topK, this.topP, this.minP, this.tfsZ, this.typicalP, this.repeatLastN, this.temperature,
+				this.repeatPenalty, this.presencePenalty, this.frequencyPenalty, this.mirostat, this.mirostatTau,
+				this.mirostatEta, this.penalizeNewline, this.stop, this.toolCallbacks, this.toolNames,
+				this.internalToolExecutionEnabled, this.toolContext);
 	}
 
 	public static final class Builder {
@@ -1037,6 +1072,78 @@ public Builder stop(List<String> stop) {
 			return this;
 		}
 
+		/**
+		 * Enable thinking mode for the model. The model will include its reasoning
+		 * process in the response's thinking field.
+		 * <p>
+		 * Supported by models: Qwen 3, DeepSeek-v3.1, DeepSeek R1
+		 * @return this builder
+		 * @see #disableThinking()
+		 * @see #thinkLow()
+		 */
+		public Builder enableThinking() {
+			this.options.thinkOption = ThinkOption.ThinkBoolean.ENABLED;
+			return this;
+		}
+
+		/**
+		 * Disable thinking mode for the model.
+		 * @return this builder
+		 * @see #enableThinking()
+		 */
+		public Builder disableThinking() {
+			this.options.thinkOption = ThinkOption.ThinkBoolean.DISABLED;
+			return this;
+		}
+
+		/**
+		 * Set thinking level to "low" (for GPT-OSS model).
+		 * <p>
+		 * GPT-OSS requires one of: low, medium, high. Boolean enable/disable is not
+		 * supported for this model.
+		 * @return this builder
+		 * @see #thinkMedium()
+		 * @see #thinkHigh()
+		 */
+		public Builder thinkLow() {
+			this.options.thinkOption = ThinkOption.ThinkLevel.LOW;
+			return this;
+		}
+
+		/**
+		 * Set thinking level to "medium" (for GPT-OSS model).
+		 * @return this builder
+		 * @see #thinkLow()
+		 * @see #thinkHigh()
+		 */
+		public Builder thinkMedium() {
+			this.options.thinkOption = ThinkOption.ThinkLevel.MEDIUM;
+			return this;
+		}
+
+		/**
+		 * Set thinking level to "high" (for GPT-OSS model).
+		 * @return this builder
+		 * @see #thinkLow()
+		 * @see #thinkMedium()
+		 */
+		public Builder thinkHigh() {
+			this.options.thinkOption = ThinkOption.ThinkLevel.HIGH;
+			return this;
+		}
+
+		/**
+		 * Set the think option explicitly. Use {@link #enableThinking()},
+		 * {@link #disableThinking()}, {@link #thinkLow()}, {@link #thinkMedium()}, or
+		 * {@link #thinkHigh()} for more convenient alternatives.
+		 * @param thinkOption the think option
+		 * @return this builder
+		 */
+		public Builder thinkOption(ThinkOption thinkOption) {
+			this.options.thinkOption = thinkOption;
+			return this;
+		}
+
 		public Builder toolCallbacks(List<ToolCallback> toolCallbacks) {
 			this.options.setToolCallbacks(toolCallbacks);
 			return this;

diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java
@@ -23,6 +23,7 @@
  *
  * @author Siarhei Blashuk
  * @author Thomas Vitale
+ * @author Sun Yuhan
  * @since 1.0.0
  */
 public enum OllamaModel implements ChatModelDescription {
@@ -51,6 +52,23 @@ public enum OllamaModel implements ChatModelDescription {
 	 */
 	QWEN3_4B("qwen3:4b"),
 
+	/**
+	 * Qwen3 4B with thinking support. This variant auto-enables thinking by default in
+	 * Ollama 0.12+, providing separate reasoning traces in the response.
+	 * @see OllamaChatOptions#thinkOption
+	 */
+	QWEN3_4B_THINKING("qwen3:4b-thinking"),
+
+	/**
+	 * Qwen3 1.7b
+	 */
+	QWEN_3_1_7_B("qwen3:1.7b"),
+
+	/**
+	 * Qwen3 0.6b
+	 */
+	QWEN_3_06B("qwen3:0.6b"),
+
 	/**
 	 * QwQ is the reasoning model of the Qwen series.
 	 */

diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java
@@ -44,6 +44,7 @@
  * @author Christian Tzolov
  * @author Thomas Vitale
  * @author Ilayaperumal Gopinathan
+ * @author Sun Yuhan
  * @since 0.8.0
  * @see <a href=
  * "https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values">Ollama
@@ -55,7 +56,8 @@
 @Deprecated
 public class OllamaOptions implements ToolCallingChatOptions, EmbeddingOptions {
 
-	private static final List<String> NON_SUPPORTED_FIELDS = List.of("model", "format", "keep_alive", "truncate");
+	private static final List<String> NON_SUPPORTED_FIELDS = List.of("model", "format", "keep_alive", "truncate",
+			"think");
 
 	// Following fields are options which must be set when the model is loaded into
 	// memory.