From 58ed2e7d54bbd39d8d05ed974ef74ff8b6c03a39 Mon Sep 17 00:00:00 2001 From: liugddx Date: Sun, 19 Oct 2025 18:33:51 +0800 Subject: [PATCH 1/6] feat: add support for removing thinking tags from input text in BeanOutputConverter Signed-off-by: liugddx --- .../ai/converter/BeanOutputConverter.java | 4 ++ .../ai/converter/BeanOutputConverterTest.java | 43 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java index 176780ebe51..b8263c22793 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java @@ -169,6 +169,10 @@ public T convert(@NonNull String text) { // Remove leading and trailing whitespace text = text.trim(); + // Remove thinking tags (e.g., from Amazon Nova models) + // These tags can appear at the beginning of the response + text = text.replaceAll("(?s).*?\\s*", ""); + // Check for and remove triple backticks and "json" identifier if (text.startsWith("```") && text.endsWith("```")) { // Remove the first line if it contains "```json" diff --git a/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java b/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java index 498d0782045..97d43615450 100644 --- a/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java +++ b/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java @@ -238,6 +238,49 @@ void convertTypeReferenceArrayWithJsonAnnotations() { assertThat(testClass.get(0).getSomeString()).isEqualTo("some value"); } + @Test + void convertWithThinkingTags() { + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkingTags = "This is my reasoning process...{ \"someString\": \"some value\" }"; + var testClass = converter.convert(textWithThinkingTags); + assertThat(testClass.getSomeString()).isEqualTo("some value"); + } + + @Test + void convertWithThinkingTagsMultiline() { + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkingTags = """ + + This is my reasoning process + spanning multiple lines + + { "someString": "some value" } + """; + var testClass = converter.convert(textWithThinkingTags); + assertThat(testClass.getSomeString()).isEqualTo("some value"); + } + + @Test + void convertWithThinkingTagsAndMarkdownCodeBlock() { + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkingTags = """ + This is my reasoning process... + ```json + { "someString": "some value" } + ``` + """; + var testClass = converter.convert(textWithThinkingTags); + assertThat(testClass.getSomeString()).isEqualTo("some value"); + } + + @Test + void convertWithMultipleThinkingTags() { + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkingTags = "First thoughtSecond thought{ \"someString\": \"some value\" }"; + var testClass = converter.convert(textWithThinkingTags); + assertThat(testClass.getSomeString()).isEqualTo("some value"); + } + } // @checkstyle:off RegexpSinglelineJavaCheck From 548cf8291f4ac5ba0493f39df061be94299ea4ab Mon Sep 17 00:00:00 2001 From: liugddx Date: Sun, 19 Oct 2025 21:09:46 +0800 Subject: [PATCH 2/6] style: apply spring-javaformat to fix formatting violations Signed-off-by: liugddx --- .../ai/converter/BeanOutputConverterTest.java | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java b/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java index 97d43615450..faeef60700f 100644 --- a/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java +++ b/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java @@ -250,12 +250,12 @@ void convertWithThinkingTags() { void convertWithThinkingTagsMultiline() { var converter = new BeanOutputConverter<>(TestClass.class); String textWithThinkingTags = """ - - This is my reasoning process - spanning multiple lines - - { "someString": "some value" } - """; + + This is my reasoning process + spanning multiple lines + + { "someString": "some value" } + """; var testClass = converter.convert(textWithThinkingTags); assertThat(testClass.getSomeString()).isEqualTo("some value"); } @@ -264,11 +264,11 @@ void convertWithThinkingTagsMultiline() { void convertWithThinkingTagsAndMarkdownCodeBlock() { var converter = new BeanOutputConverter<>(TestClass.class); String textWithThinkingTags = """ - This is my reasoning process... - ```json - { "someString": "some value" } - ``` - """; + This is my reasoning process... + ```json + { "someString": "some value" } + ``` + """; var testClass = converter.convert(textWithThinkingTags); assertThat(testClass.getSomeString()).isEqualTo("some value"); } From d997c80e053be07687bcd2cd626fc894741217eb Mon Sep 17 00:00:00 2001 From: liugddx Date: Mon, 20 Oct 2025 08:17:36 +0800 Subject: [PATCH 3/6] feat: enhance BeanOutputConverter with customizable text cleaning capabilities Signed-off-by: liugddx --- .../ai/converter/BeanOutputConverter.java | 79 +++++--- .../CompositeResponseTextCleaner.java | 109 +++++++++++ .../converter/MarkdownCodeBlockCleaner.java | 74 ++++++++ .../ai/converter/ResponseTextCleaner.java | 38 ++++ .../ai/converter/ThinkingTagCleaner.java | 172 ++++++++++++++++++ .../ai/converter/WhitespaceCleaner.java | 33 ++++ .../ai/converter/BeanOutputConverterTest.java | 96 +++++++++- .../CompositeResponseTextCleanerTest.java | 106 +++++++++++ .../ai/converter/ThinkingTagCleanerTest.java | 155 ++++++++++++++++ 9 files changed, 826 insertions(+), 36 deletions(-) create mode 100644 spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java create mode 100644 spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java create mode 100644 spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java create mode 100644 spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java create mode 100644 spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java create mode 100644 spring-ai-model/src/test/java/org/springframework/ai/converter/CompositeResponseTextCleanerTest.java create mode 100644 spring-ai-model/src/test/java/org/springframework/ai/converter/ThinkingTagCleanerTest.java diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java index b8263c22793..251f5116458 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java @@ -76,12 +76,15 @@ public class BeanOutputConverter implements StructuredOutputConverter { /** Holds the generated JSON schema for the target type. */ private String jsonSchema; + /** The text cleaner used to preprocess LLM responses before parsing. */ + private final ResponseTextCleaner textCleaner; + /** * Constructor to initialize with the target type's class. * @param clazz The target type's class. */ public BeanOutputConverter(Class clazz) { - this(ParameterizedTypeReference.forType(clazz)); + this(clazz, null, null); } /** @@ -91,7 +94,18 @@ public BeanOutputConverter(Class clazz) { * @param objectMapper Custom object mapper for JSON operations. endings. */ public BeanOutputConverter(Class clazz, ObjectMapper objectMapper) { - this(ParameterizedTypeReference.forType(clazz), objectMapper); + this(clazz, objectMapper, null); + } + + /** + * Constructor to initialize with the target type's class, a custom object mapper, and + * a custom text cleaner. + * @param clazz The target type's class. + * @param objectMapper Custom object mapper for JSON operations. + * @param textCleaner Custom text cleaner for preprocessing responses. + */ + public BeanOutputConverter(Class clazz, ObjectMapper objectMapper, ResponseTextCleaner textCleaner) { + this(ParameterizedTypeReference.forType(clazz), objectMapper, textCleaner); } /** @@ -99,7 +113,7 @@ public BeanOutputConverter(Class clazz, ObjectMapper objectMapper) { * @param typeRef The target class type reference. */ public BeanOutputConverter(ParameterizedTypeReference typeRef) { - this(typeRef.getType(), null); + this(typeRef, null, null); } /** @@ -110,7 +124,19 @@ public BeanOutputConverter(ParameterizedTypeReference typeRef) { * @param objectMapper Custom object mapper for JSON operations. endings. */ public BeanOutputConverter(ParameterizedTypeReference typeRef, ObjectMapper objectMapper) { - this(typeRef.getType(), objectMapper); + this(typeRef, objectMapper, null); + } + + /** + * Constructor to initialize with the target class type reference, a custom object + * mapper, and a custom text cleaner. + * @param typeRef The target class type reference. + * @param objectMapper Custom object mapper for JSON operations. + * @param textCleaner Custom text cleaner for preprocessing responses. + */ + public BeanOutputConverter(ParameterizedTypeReference typeRef, ObjectMapper objectMapper, + ResponseTextCleaner textCleaner) { + this(typeRef.getType(), objectMapper, textCleaner); } /** @@ -119,14 +145,30 @@ public BeanOutputConverter(ParameterizedTypeReference typeRef, ObjectMapper o * platform. * @param type The target class type. * @param objectMapper Custom object mapper for JSON operations. endings. + * @param textCleaner Custom text cleaner for preprocessing responses. */ - private BeanOutputConverter(Type type, ObjectMapper objectMapper) { + private BeanOutputConverter(Type type, ObjectMapper objectMapper, ResponseTextCleaner textCleaner) { Objects.requireNonNull(type, "Type cannot be null;"); this.type = type; this.objectMapper = objectMapper != null ? objectMapper : getObjectMapper(); + this.textCleaner = textCleaner != null ? textCleaner : createDefaultTextCleaner(); generateSchema(); } + /** + * Creates the default text cleaner that handles common response formats from various + * AI models. + * @return a composite text cleaner with default cleaning strategies + */ + private static ResponseTextCleaner createDefaultTextCleaner() { + return CompositeResponseTextCleaner.builder() + .addCleaner(new WhitespaceCleaner()) + .addCleaner(new ThinkingTagCleaner()) + .addCleaner(new MarkdownCodeBlockCleaner()) + .addCleaner(new WhitespaceCleaner()) // Final trim after all cleanups + .build(); + } + /** * Generates the JSON schema for the target type. */ @@ -166,30 +208,9 @@ private void generateSchema() { @Override public T convert(@NonNull String text) { try { - // Remove leading and trailing whitespace - text = text.trim(); - - // Remove thinking tags (e.g., from Amazon Nova models) - // These tags can appear at the beginning of the response - text = text.replaceAll("(?s).*?\\s*", ""); - - // Check for and remove triple backticks and "json" identifier - if (text.startsWith("```") && text.endsWith("```")) { - // Remove the first line if it contains "```json" - String[] lines = text.split("\n", 2); - if (lines[0].trim().equalsIgnoreCase("```json")) { - text = lines.length > 1 ? lines[1] : ""; - } - else { - text = text.substring(3); // Remove leading ``` - } - - // Remove trailing ``` - text = text.substring(0, text.length() - 3); - - // Trim again to remove any potential whitespace - text = text.trim(); - } + // Clean the text using the configured text cleaner + text = this.textCleaner.clean(text); + return (T) this.objectMapper.readValue(text, this.objectMapper.constructType(this.type)); } catch (JsonProcessingException e) { diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java new file mode 100644 index 00000000000..5b07518190c --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java @@ -0,0 +1,109 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.converter; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.springframework.util.Assert; + +/** + * A composite {@link ResponseTextCleaner} that applies multiple cleaners in sequence. + * This allows for a flexible pipeline of text cleaning operations. + * + * @author liugddx + * @since 1.0.0 + */ +public class CompositeResponseTextCleaner implements ResponseTextCleaner { + + private final List cleaners; + + /** + * Creates a composite cleaner with the given cleaners. + * @param cleaners the list of cleaners to apply in order + */ + public CompositeResponseTextCleaner(List cleaners) { + Assert.notNull(cleaners, "cleaners cannot be null"); + this.cleaners = new ArrayList<>(cleaners); + } + + /** + * Creates a composite cleaner with no cleaners. Text will be returned unchanged. + */ + public CompositeResponseTextCleaner() { + this(new ArrayList<>()); + } + + /** + * Creates a composite cleaner with the given cleaners. + * @param cleaners the cleaners to apply in order + */ + public CompositeResponseTextCleaner(ResponseTextCleaner... cleaners) { + this(Arrays.asList(cleaners)); + } + + @Override + public String clean(String text) { + String result = text; + for (ResponseTextCleaner cleaner : this.cleaners) { + result = cleaner.clean(result); + } + return result; + } + + /** + * Creates a builder for constructing a composite cleaner. + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for {@link CompositeResponseTextCleaner}. + */ + public static final class Builder { + + private final List cleaners = new ArrayList<>(); + + private Builder() { + } + + /** + * Add a cleaner to the pipeline. + * @param cleaner the cleaner to add + * @return this builder + */ + public Builder addCleaner(ResponseTextCleaner cleaner) { + Assert.notNull(cleaner, "cleaner cannot be null"); + this.cleaners.add(cleaner); + return this; + } + + /** + * Build the composite cleaner. + * @return a new composite cleaner instance + */ + public CompositeResponseTextCleaner build() { + return new CompositeResponseTextCleaner(this.cleaners); + } + + } + +} + diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java new file mode 100644 index 00000000000..d44fc08821c --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java @@ -0,0 +1,74 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.converter; + +/** + * A {@link ResponseTextCleaner} that removes markdown code block formatting from LLM + * responses. This cleaner handles: + *
    + *
  • {@code ```json ... ```}
  • + *
  • {@code ``` ... ```}
  • + *
+ * + * @author liugddx + * @since 1.0.0 + */ +public class MarkdownCodeBlockCleaner implements ResponseTextCleaner { + + @Override + public String clean(String text) { + if (text == null || text.isEmpty()) { + return text; + } + + // Trim leading and trailing whitespace first + text = text.trim(); + + // Check for and remove triple backticks + if (text.startsWith("```") && text.endsWith("```")) { + // Remove the first line if it contains "```json" or similar + String[] lines = text.split("\n", 2); + if (lines[0].trim().toLowerCase().startsWith("```")) { + // Extract language identifier if present + String firstLine = lines[0].trim(); + if (firstLine.length() > 3) { + // Has language identifier like ```json + text = lines.length > 1 ? lines[1] : ""; + } + else { + // Just ``` without language + text = text.substring(3); + } + } + else { + text = text.substring(3); + } + + // Remove trailing ``` + if (text.endsWith("```")) { + text = text.substring(0, text.length() - 3); + } + + // Trim again to remove any potential whitespace + text = text.trim(); + } + + return text; + } + +} + diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java new file mode 100644 index 00000000000..35cfbc9a277 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java @@ -0,0 +1,38 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.converter; + +/** + * Strategy interface for cleaning LLM response text before parsing. Different + * implementations can handle various response formats and patterns from different AI + * models. + * + * @author liugddx + * @since 1.0.0 + */ +@FunctionalInterface +public interface ResponseTextCleaner { + + /** + * Clean the given text by removing unwanted patterns, tags, or formatting. + * @param text the raw text from LLM response + * @return the cleaned text ready for parsing + */ + String clean(String text); + +} + diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java new file mode 100644 index 00000000000..aa765afc069 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java @@ -0,0 +1,172 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.converter; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Pattern; + +import org.springframework.util.Assert; + +/** + * A {@link ResponseTextCleaner} that removes thinking tags from LLM responses. This + * cleaner supports multiple tag patterns to handle different AI models: + *
    + *
  • Amazon Nova: {@code ...}
  • + *
  • Qwen models: {@code ...}
  • + *
  • DeepSeek models: various thinking patterns
  • + *
  • Claude models: thinking blocks in different formats
  • + *
+ * + * @author liugddx + * @since 1.0.0 + */ +public class ThinkingTagCleaner implements ResponseTextCleaner { + + /** + * Default thinking tag patterns used by common AI models. + */ + private static final List DEFAULT_PATTERNS = Arrays.asList( + // Amazon Nova: ... + Pattern.compile("(?s).*?\\s*", Pattern.CASE_INSENSITIVE), + // Qwen models: ... + Pattern.compile("(?s).*?\\s*", Pattern.CASE_INSENSITIVE), + // Alternative XML-style tags + Pattern.compile("(?s).*?\\s*", Pattern.CASE_INSENSITIVE), + // Markdown style thinking blocks + Pattern.compile("(?s)```thinking.*?```\\s*", Pattern.CASE_INSENSITIVE), + // Some models use comment-style + Pattern.compile("(?s)\\s*", Pattern.CASE_INSENSITIVE)); + + private final List patterns; + + /** + * Creates a cleaner with default thinking tag patterns. + */ + public ThinkingTagCleaner() { + this(DEFAULT_PATTERNS); + } + + /** + * Creates a cleaner with custom patterns. + * @param patterns the list of regex patterns to match thinking tags + */ + public ThinkingTagCleaner(List patterns) { + Assert.notNull(patterns, "patterns cannot be null"); + Assert.notEmpty(patterns, "patterns cannot be empty"); + this.patterns = new ArrayList<>(patterns); + } + + /** + * Creates a cleaner with custom pattern strings. + * @param patternStrings the list of regex pattern strings to match thinking tags + */ + public ThinkingTagCleaner(String... patternStrings) { + Assert.notNull(patternStrings, "patternStrings cannot be null"); + Assert.notEmpty(patternStrings, "patternStrings cannot be empty"); + this.patterns = new ArrayList<>(); + for (String patternString : patternStrings) { + this.patterns.add(Pattern.compile(patternString, Pattern.CASE_INSENSITIVE)); + } + } + + @Override + public String clean(String text) { + if (text == null || text.isEmpty()) { + return text; + } + + String result = text; + for (Pattern pattern : this.patterns) { + result = pattern.matcher(result).replaceAll(""); + } + return result; + } + + /** + * Creates a builder for constructing a thinking tag cleaner. + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for {@link ThinkingTagCleaner}. + */ + public static final class Builder { + + private final List patterns = new ArrayList<>(DEFAULT_PATTERNS); + + private boolean useDefaultPatterns = true; + + private Builder() { + } + + /** + * Disable default patterns. Only custom patterns added via + * {@link #addPattern(String)} or {@link #addPattern(Pattern)} will be used. + * @return this builder + */ + public Builder withoutDefaultPatterns() { + this.useDefaultPatterns = false; + return this; + } + + /** + * Add a custom pattern string. + * @param patternString the regex pattern string + * @return this builder + */ + public Builder addPattern(String patternString) { + Assert.hasText(patternString, "patternString cannot be empty"); + if (!this.useDefaultPatterns) { + this.patterns.clear(); + this.useDefaultPatterns = true; // Reset flag after first custom pattern + } + this.patterns.add(Pattern.compile(patternString, Pattern.CASE_INSENSITIVE)); + return this; + } + + /** + * Add a custom pattern. + * @param pattern the regex pattern + * @return this builder + */ + public Builder addPattern(Pattern pattern) { + Assert.notNull(pattern, "pattern cannot be null"); + if (!this.useDefaultPatterns) { + this.patterns.clear(); + this.useDefaultPatterns = true; // Reset flag after first custom pattern + } + this.patterns.add(pattern); + return this; + } + + /** + * Build the thinking tag cleaner. + * @return a new thinking tag cleaner instance + */ + public ThinkingTagCleaner build() { + return new ThinkingTagCleaner(this.patterns); + } + + } + +} + diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java new file mode 100644 index 00000000000..69c5b257310 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java @@ -0,0 +1,33 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.converter; + +/** + * A {@link ResponseTextCleaner} that trims leading and trailing whitespace from text. + * + * @author liugddx + * @since 1.0.0 + */ +public class WhitespaceCleaner implements ResponseTextCleaner { + + @Override + public String clean(String text) { + return text != null ? text.trim() : text; + } + +} + diff --git a/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java b/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java index faeef60700f..a92782dced4 100644 --- a/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java +++ b/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java @@ -273,16 +273,98 @@ void convertWithThinkingTagsAndMarkdownCodeBlock() { assertThat(testClass.getSomeString()).isEqualTo("some value"); } - @Test - void convertWithMultipleThinkingTags() { - var converter = new BeanOutputConverter<>(TestClass.class); - String textWithThinkingTags = "First thoughtSecond thought{ \"someString\": \"some value\" }"; - var testClass = converter.convert(textWithThinkingTags); - assertThat(testClass.getSomeString()).isEqualTo("some value"); - } + @Test + void convertWithMultipleThinkingTags() { + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkingTags = "First thoughtSecond thought{ \"someString\": \"some value\" }"; + var testClass = converter.convert(textWithThinkingTags); + assertThat(testClass.getSomeString()).isEqualTo("some value"); + } + + @Test + void convertWithQwenThinkTags() { + // Test Qwen model format: ... + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkTags = "Let me analyze this...{ \"someString\": \"qwen test\" }"; + var testClass = converter.convert(textWithThinkTags); + assertThat(testClass.getSomeString()).isEqualTo("qwen test"); + } + + @Test + void convertWithQwenThinkTagsMultiline() { + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkTags = """ + + Analyzing the request step by step + First, I need to understand the schema + Then generate the JSON + + { "someString": "qwen multiline" } + """; + var testClass = converter.convert(textWithThinkTags); + assertThat(testClass.getSomeString()).isEqualTo("qwen multiline"); + } + + @Test + void convertWithMixedThinkingAndThinkTags() { + // Test mixed format from different models + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithMixedTags = "Nova reasoningQwen analysis{ \"someString\": \"mixed test\" }"; + var testClass = converter.convert(textWithMixedTags); + assertThat(testClass.getSomeString()).isEqualTo("mixed test"); + } + + @Test + void convertWithReasoningTags() { + // Test alternative reasoning tags + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithReasoningTags = "Internal reasoning process{ \"someString\": \"reasoning test\" }"; + var testClass = converter.convert(textWithReasoningTags); + assertThat(testClass.getSomeString()).isEqualTo("reasoning test"); + } + + @Test + void convertWithMarkdownThinkingBlock() { + // Test markdown-style thinking block + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithMarkdownThinking = """ + ```thinking + This is a markdown-style thinking block + Used by some models + ``` + { "someString": "markdown thinking" } + """; + var testClass = converter.convert(textWithMarkdownThinking); + assertThat(testClass.getSomeString()).isEqualTo("markdown thinking"); + } + @Test + void convertWithCaseInsensitiveTags() { + // Test case insensitive tag matching + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithUpperCaseTags = "UPPERCASE THINKING{ \"someString\": \"case test\" }"; + var testClass = converter.convert(textWithUpperCaseTags); + assertThat(testClass.getSomeString()).isEqualTo("case test"); } + @Test + void convertWithComplexNestedStructure() { + // Test complex scenario with multiple formats combined + var converter = new BeanOutputConverter<>(TestClass.class); + String complexText = """ + Nova model reasoning + Qwen model analysis + + ```json + { "someString": "complex test" } + ``` + """; + var testClass = converter.convert(complexText); + assertThat(testClass.getSomeString()).isEqualTo("complex test"); + } + +} + // @checkstyle:off RegexpSinglelineJavaCheck @Nested class FormatTest { diff --git a/spring-ai-model/src/test/java/org/springframework/ai/converter/CompositeResponseTextCleanerTest.java b/spring-ai-model/src/test/java/org/springframework/ai/converter/CompositeResponseTextCleanerTest.java new file mode 100644 index 00000000000..70949be350b --- /dev/null +++ b/spring-ai-model/src/test/java/org/springframework/ai/converter/CompositeResponseTextCleanerTest.java @@ -0,0 +1,106 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.converter; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for {@link CompositeResponseTextCleaner}. + * + * @author liugddx + */ +class CompositeResponseTextCleanerTest { + + @Test + void shouldApplyCleanersInOrder() { + var cleaner = CompositeResponseTextCleaner.builder() + .addCleaner(text -> text.replace("A", "B")) + .addCleaner(text -> text.replace("B", "C")) + .build(); + + String result = cleaner.clean("AAA"); + assertThat(result).isEqualTo("CCC"); + } + + @Test + void shouldWorkWithSingleCleaner() { + var cleaner = new CompositeResponseTextCleaner(text -> text.trim()); + String result = cleaner.clean(" content "); + assertThat(result).isEqualTo("content"); + } + + @Test + void shouldWorkWithMultipleCleaners() { + var cleaner = new CompositeResponseTextCleaner(new WhitespaceCleaner(), new ThinkingTagCleaner(), + new MarkdownCodeBlockCleaner()); + + String input = """ + Reasoning + ```json + {"key": "value"} + ``` + """; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("{\"key\": \"value\"}"); + } + + @Test + void shouldHandleComplexPipeline() { + var cleaner = CompositeResponseTextCleaner.builder() + .addCleaner(new WhitespaceCleaner()) + .addCleaner(new ThinkingTagCleaner()) + .addCleaner(new MarkdownCodeBlockCleaner()) + .addCleaner(new WhitespaceCleaner()) + .build(); + + String input = """ + + Let me analyze this + Qwen style thinking + + ```json + { + "result": "test" + } + ``` + + """; + + String result = cleaner.clean(input); + assertThat(result).isEqualTo("{\n\t\"result\": \"test\"\n}"); + } + + @Test + void shouldThrowExceptionWhenCleanersIsNull() { + assertThatThrownBy(() -> CompositeResponseTextCleaner.builder().addCleaner(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("cleaner cannot be null"); + } + + @Test + void shouldHandleEmptyCleanersList() { + var cleaner = new CompositeResponseTextCleaner(); + String input = "test content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo(input); + } + +} + diff --git a/spring-ai-model/src/test/java/org/springframework/ai/converter/ThinkingTagCleanerTest.java b/spring-ai-model/src/test/java/org/springframework/ai/converter/ThinkingTagCleanerTest.java new file mode 100644 index 00000000000..4faed0a4d0a --- /dev/null +++ b/spring-ai-model/src/test/java/org/springframework/ai/converter/ThinkingTagCleanerTest.java @@ -0,0 +1,155 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.converter; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for {@link ThinkingTagCleaner}. + * + * @author liugddx + */ +class ThinkingTagCleanerTest { + + @Test + void shouldRemoveAmazonNovaThinkingTags() { + var cleaner = new ThinkingTagCleaner(); + String input = "My reasoning processActual content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldRemoveQwenThinkTags() { + var cleaner = new ThinkingTagCleaner(); + String input = "Let me think about thisActual content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldRemoveReasoningTags() { + var cleaner = new ThinkingTagCleaner(); + String input = "Step by step reasoningActual content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldRemoveMultilineThinkingTags() { + var cleaner = new ThinkingTagCleaner(); + String input = """ + + Line 1 of thinking + Line 2 of thinking + + Actual content"""; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldRemoveMultipleThinkingTags() { + var cleaner = new ThinkingTagCleaner(); + String input = "FirstSecondThirdActual content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldBeCaseInsensitive() { + var cleaner = new ThinkingTagCleaner(); + String input = "UPPER CASEActual content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldRemoveMarkdownThinkingBlocks() { + var cleaner = new ThinkingTagCleaner(); + String input = """ + ```thinking + This is markdown thinking + ``` + Actual content"""; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldHandleEmptyInput() { + var cleaner = new ThinkingTagCleaner(); + assertThat(cleaner.clean("")).isEmpty(); + assertThat(cleaner.clean(null)).isNull(); + } + + @Test + void shouldHandleContentWithoutTags() { + var cleaner = new ThinkingTagCleaner(); + String input = "Just regular content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo(input); + } + + @Test + void shouldSupportCustomPatterns() { + var cleaner = new ThinkingTagCleaner("(?s).*?\\s*"); + String input = "Custom tag contentActual content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldSupportBuilderWithoutDefaultPatterns() { + var cleaner = ThinkingTagCleaner.builder() + .withoutDefaultPatterns() + .addPattern("(?s).*?\\s*") + .build(); + + String input = "Should remainShould be removedContent"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Should remainContent"); + } + + @Test + void shouldSupportBuilderWithAdditionalPatterns() { + var cleaner = ThinkingTagCleaner.builder().addPattern("(?s).*?\\s*").build(); + + String input = "RemovedAlso removedContent"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Content"); + } + + @Test + void shouldThrowExceptionWhenPatternsAreNull() { + assertThatThrownBy(() -> new ThinkingTagCleaner((String[]) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("patternStrings cannot be null"); + } + + @Test + void shouldThrowExceptionWhenPatternsAreEmpty() { + assertThatThrownBy(() -> new ThinkingTagCleaner(new String[0])) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("patternStrings cannot be empty"); + } + +} + From 89278794a7f6f871168fce652e402ad7acd792c4 Mon Sep 17 00:00:00 2001 From: liugddx Date: Mon, 20 Oct 2025 08:37:13 +0800 Subject: [PATCH 4/6] feat: enhance BeanOutputConverter with customizable text cleaning capabilities Signed-off-by: liugddx --- .../CompositeResponseTextCleaner.java | 1 - .../converter/MarkdownCodeBlockCleaner.java | 1 - .../ai/converter/ResponseTextCleaner.java | 1 - .../ai/converter/ThinkingTagCleaner.java | 1 - .../ai/converter/WhitespaceCleaner.java | 1 - .../ai/converter/BeanOutputConverterTest.java | 166 +++++++++--------- .../CompositeResponseTextCleanerTest.java | 11 +- .../ai/converter/ThinkingTagCleanerTest.java | 7 +- 8 files changed, 90 insertions(+), 99 deletions(-) diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java index 5b07518190c..6bac5d20ac8 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java @@ -106,4 +106,3 @@ public CompositeResponseTextCleaner build() { } } - diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java index d44fc08821c..a792750211c 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java @@ -71,4 +71,3 @@ public String clean(String text) { } } - diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java index 35cfbc9a277..0a06c9940b8 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java @@ -35,4 +35,3 @@ public interface ResponseTextCleaner { String clean(String text); } - diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java index aa765afc069..19014beb25c 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java @@ -169,4 +169,3 @@ public ThinkingTagCleaner build() { } } - diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java index 69c5b257310..1ebd3961e66 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java @@ -30,4 +30,3 @@ public String clean(String text) { } } - diff --git a/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java b/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java index a92782dced4..9066aa2c814 100644 --- a/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java +++ b/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java @@ -273,97 +273,97 @@ void convertWithThinkingTagsAndMarkdownCodeBlock() { assertThat(testClass.getSomeString()).isEqualTo("some value"); } - @Test - void convertWithMultipleThinkingTags() { - var converter = new BeanOutputConverter<>(TestClass.class); - String textWithThinkingTags = "First thoughtSecond thought{ \"someString\": \"some value\" }"; - var testClass = converter.convert(textWithThinkingTags); - assertThat(testClass.getSomeString()).isEqualTo("some value"); - } + @Test + void convertWithMultipleThinkingTags() { + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkingTags = "First thoughtSecond thought{ \"someString\": \"some value\" }"; + var testClass = converter.convert(textWithThinkingTags); + assertThat(testClass.getSomeString()).isEqualTo("some value"); + } - @Test - void convertWithQwenThinkTags() { - // Test Qwen model format: ... - var converter = new BeanOutputConverter<>(TestClass.class); - String textWithThinkTags = "Let me analyze this...{ \"someString\": \"qwen test\" }"; - var testClass = converter.convert(textWithThinkTags); - assertThat(testClass.getSomeString()).isEqualTo("qwen test"); - } + @Test + void convertWithQwenThinkTags() { + // Test Qwen model format: ... + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkTags = "Let me analyze this...{ \"someString\": \"qwen test\" }"; + var testClass = converter.convert(textWithThinkTags); + assertThat(testClass.getSomeString()).isEqualTo("qwen test"); + } - @Test - void convertWithQwenThinkTagsMultiline() { - var converter = new BeanOutputConverter<>(TestClass.class); - String textWithThinkTags = """ - - Analyzing the request step by step - First, I need to understand the schema - Then generate the JSON - - { "someString": "qwen multiline" } - """; - var testClass = converter.convert(textWithThinkTags); - assertThat(testClass.getSomeString()).isEqualTo("qwen multiline"); - } + @Test + void convertWithQwenThinkTagsMultiline() { + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkTags = """ + + Analyzing the request step by step + First, I need to understand the schema + Then generate the JSON + + { "someString": "qwen multiline" } + """; + var testClass = converter.convert(textWithThinkTags); + assertThat(testClass.getSomeString()).isEqualTo("qwen multiline"); + } - @Test - void convertWithMixedThinkingAndThinkTags() { - // Test mixed format from different models - var converter = new BeanOutputConverter<>(TestClass.class); - String textWithMixedTags = "Nova reasoningQwen analysis{ \"someString\": \"mixed test\" }"; - var testClass = converter.convert(textWithMixedTags); - assertThat(testClass.getSomeString()).isEqualTo("mixed test"); - } + @Test + void convertWithMixedThinkingAndThinkTags() { + // Test mixed format from different models + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithMixedTags = "Nova reasoningQwen analysis{ \"someString\": \"mixed test\" }"; + var testClass = converter.convert(textWithMixedTags); + assertThat(testClass.getSomeString()).isEqualTo("mixed test"); + } - @Test - void convertWithReasoningTags() { - // Test alternative reasoning tags - var converter = new BeanOutputConverter<>(TestClass.class); - String textWithReasoningTags = "Internal reasoning process{ \"someString\": \"reasoning test\" }"; - var testClass = converter.convert(textWithReasoningTags); - assertThat(testClass.getSomeString()).isEqualTo("reasoning test"); - } + @Test + void convertWithReasoningTags() { + // Test alternative reasoning tags + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithReasoningTags = "Internal reasoning process{ \"someString\": \"reasoning test\" }"; + var testClass = converter.convert(textWithReasoningTags); + assertThat(testClass.getSomeString()).isEqualTo("reasoning test"); + } - @Test - void convertWithMarkdownThinkingBlock() { - // Test markdown-style thinking block - var converter = new BeanOutputConverter<>(TestClass.class); - String textWithMarkdownThinking = """ - ```thinking - This is a markdown-style thinking block - Used by some models - ``` - { "someString": "markdown thinking" } - """; - var testClass = converter.convert(textWithMarkdownThinking); - assertThat(testClass.getSomeString()).isEqualTo("markdown thinking"); - } + @Test + void convertWithMarkdownThinkingBlock() { + // Test markdown-style thinking block + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithMarkdownThinking = """ + ```thinking + This is a markdown-style thinking block + Used by some models + ``` + { "someString": "markdown thinking" } + """; + var testClass = converter.convert(textWithMarkdownThinking); + assertThat(testClass.getSomeString()).isEqualTo("markdown thinking"); + } - @Test - void convertWithCaseInsensitiveTags() { - // Test case insensitive tag matching - var converter = new BeanOutputConverter<>(TestClass.class); - String textWithUpperCaseTags = "UPPERCASE THINKING{ \"someString\": \"case test\" }"; - var testClass = converter.convert(textWithUpperCaseTags); - assertThat(testClass.getSomeString()).isEqualTo("case test"); - } + @Test + void convertWithCaseInsensitiveTags() { + // Test case insensitive tag matching + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithUpperCaseTags = "UPPERCASE THINKING{ \"someString\": \"case test\" }"; + var testClass = converter.convert(textWithUpperCaseTags); + assertThat(testClass.getSomeString()).isEqualTo("case test"); + } - @Test - void convertWithComplexNestedStructure() { - // Test complex scenario with multiple formats combined - var converter = new BeanOutputConverter<>(TestClass.class); - String complexText = """ - Nova model reasoning - Qwen model analysis - - ```json - { "someString": "complex test" } - ``` - """; - var testClass = converter.convert(complexText); - assertThat(testClass.getSomeString()).isEqualTo("complex test"); - } + @Test + void convertWithComplexNestedStructure() { + // Test complex scenario with multiple formats combined + var converter = new BeanOutputConverter<>(TestClass.class); + String complexText = """ + Nova model reasoning + Qwen model analysis -} + ```json + { "someString": "complex test" } + ``` + """; + var testClass = converter.convert(complexText); + assertThat(testClass.getSomeString()).isEqualTo("complex test"); + } + + } // @checkstyle:off RegexpSinglelineJavaCheck @Nested diff --git a/spring-ai-model/src/test/java/org/springframework/ai/converter/CompositeResponseTextCleanerTest.java b/spring-ai-model/src/test/java/org/springframework/ai/converter/CompositeResponseTextCleanerTest.java index 70949be350b..7cf0ebc54c1 100644 --- a/spring-ai-model/src/test/java/org/springframework/ai/converter/CompositeResponseTextCleanerTest.java +++ b/spring-ai-model/src/test/java/org/springframework/ai/converter/CompositeResponseTextCleanerTest.java @@ -52,11 +52,11 @@ void shouldWorkWithMultipleCleaners() { new MarkdownCodeBlockCleaner()); String input = """ - Reasoning - ```json - {"key": "value"} - ``` - """; + Reasoning + ```json + {"key": "value"} + ``` + """; String result = cleaner.clean(input); assertThat(result).isEqualTo("{\"key\": \"value\"}"); } @@ -103,4 +103,3 @@ void shouldHandleEmptyCleanersList() { } } - diff --git a/spring-ai-model/src/test/java/org/springframework/ai/converter/ThinkingTagCleanerTest.java b/spring-ai-model/src/test/java/org/springframework/ai/converter/ThinkingTagCleanerTest.java index 4faed0a4d0a..a8d596f337f 100644 --- a/spring-ai-model/src/test/java/org/springframework/ai/converter/ThinkingTagCleanerTest.java +++ b/spring-ai-model/src/test/java/org/springframework/ai/converter/ThinkingTagCleanerTest.java @@ -139,17 +139,14 @@ void shouldSupportBuilderWithAdditionalPatterns() { @Test void shouldThrowExceptionWhenPatternsAreNull() { - assertThatThrownBy(() -> new ThinkingTagCleaner((String[]) null)) - .isInstanceOf(IllegalArgumentException.class) + assertThatThrownBy(() -> new ThinkingTagCleaner((String[]) null)).isInstanceOf(IllegalArgumentException.class) .hasMessageContaining("patternStrings cannot be null"); } @Test void shouldThrowExceptionWhenPatternsAreEmpty() { - assertThatThrownBy(() -> new ThinkingTagCleaner(new String[0])) - .isInstanceOf(IllegalArgumentException.class) + assertThatThrownBy(() -> new ThinkingTagCleaner(new String[0])).isInstanceOf(IllegalArgumentException.class) .hasMessageContaining("patternStrings cannot be empty"); } } - From ad284711a59dca1dc411ce9d65b54e29aceaf2f8 Mon Sep 17 00:00:00 2001 From: liugddx Date: Tue, 21 Oct 2025 22:59:32 +0800 Subject: [PATCH 5/6] refactor: address review feedback - update version and optimize performance - Update @since to 1.1.0 as suggested by nicolaskrier - Add fast-path optimization in ThinkingTagCleaner for non-thinking models - Enhance documentation explaining why ThinkingTagCleaner is safe to use by default - Clarify performance characteristics in JavaDoc Addresses review comments from nicolaskrier in PR #4667 Signed-off-by: liugddx --- .../ai/converter/BeanOutputConverter.java | 13 +++++++++++++ .../CompositeResponseTextCleaner.java | 2 +- .../ai/converter/MarkdownCodeBlockCleaner.java | 2 +- .../ai/converter/ResponseTextCleaner.java | 2 +- .../ai/converter/ThinkingTagCleaner.java | 18 ++++++++++++++++-- .../ai/converter/WhitespaceCleaner.java | 2 +- 6 files changed, 33 insertions(+), 6 deletions(-) diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java index 251f5116458..fc5cb61e1a0 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java @@ -158,6 +158,19 @@ private BeanOutputConverter(Type type, ObjectMapper objectMapper, ResponseTextCl /** * Creates the default text cleaner that handles common response formats from various * AI models. + *

+ * The default cleaner includes: + *

    + *
  • {@link ThinkingTagCleaner} - Removes thinking tags from models like Amazon + * Nova and Qwen. For models that don't generate thinking tags, this has minimal + * performance impact due to fast-path optimization.
  • + *
  • {@link MarkdownCodeBlockCleaner} - Removes markdown code block + * formatting.
  • + *
  • {@link WhitespaceCleaner} - Trims whitespace.
  • + *
+ *

+ * To customize the cleaning behavior, provide a custom {@link ResponseTextCleaner} + * via the constructor. * @return a composite text cleaner with default cleaning strategies */ private static ResponseTextCleaner createDefaultTextCleaner() { diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java index 6bac5d20ac8..d1def873f82 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java @@ -27,7 +27,7 @@ * This allows for a flexible pipeline of text cleaning operations. * * @author liugddx - * @since 1.0.0 + * @since 1.1.0 */ public class CompositeResponseTextCleaner implements ResponseTextCleaner { diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java index a792750211c..cd81634fc55 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java @@ -25,7 +25,7 @@ * * * @author liugddx - * @since 1.0.0 + * @since 1.1.0 */ public class MarkdownCodeBlockCleaner implements ResponseTextCleaner { diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java index 0a06c9940b8..866b495b4f4 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java @@ -22,7 +22,7 @@ * models. * * @author liugddx - * @since 1.0.0 + * @since 1.1.0 */ @FunctionalInterface public interface ResponseTextCleaner { diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java index 19014beb25c..ff92f11cae6 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java @@ -32,9 +32,13 @@ *

  • DeepSeek models: various thinking patterns
  • *
  • Claude models: thinking blocks in different formats
  • * + *

    + * Performance: This cleaner includes fast-path optimization. For responses + * without thinking tags (most models), it performs a quick character check and returns + * immediately, making it safe to use as a default cleaner even for non-thinking models. * * @author liugddx - * @since 1.0.0 + * @since 1.1.0 */ public class ThinkingTagCleaner implements ResponseTextCleaner { @@ -91,9 +95,19 @@ public String clean(String text) { return text; } + // Fast path: if text doesn't contain '<' character, no tags to remove + if (!text.contains("<") && !text.contains("`")) { + return text; + } + String result = text; for (Pattern pattern : this.patterns) { - result = pattern.matcher(result).replaceAll(""); + String afterReplacement = pattern.matcher(result).replaceAll(""); + // If replacement occurred, update result and continue checking other patterns + // (since multiple tag types might coexist) + if (!afterReplacement.equals(result)) { + result = afterReplacement; + } } return result; } diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java index 1ebd3961e66..1c52a6c0421 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java @@ -20,7 +20,7 @@ * A {@link ResponseTextCleaner} that trims leading and trailing whitespace from text. * * @author liugddx - * @since 1.0.0 + * @since 1.1.0 */ public class WhitespaceCleaner implements ResponseTextCleaner { From 675f27dc76866f567873d1e5d91fadfafa727ec8 Mon Sep 17 00:00:00 2001 From: liugddx Date: Tue, 21 Oct 2025 23:06:17 +0800 Subject: [PATCH 6/6] fix: improve documentation for ThinkingTagCleaner and BeanOutputConverter Signed-off-by: liugddx --- .../springframework/ai/converter/BeanOutputConverter.java | 7 +++---- .../springframework/ai/converter/ThinkingTagCleaner.java | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java index fc5cb61e1a0..889392f92ba 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java @@ -161,11 +161,10 @@ private BeanOutputConverter(Type type, ObjectMapper objectMapper, ResponseTextCl *

    * The default cleaner includes: *

      - *
    • {@link ThinkingTagCleaner} - Removes thinking tags from models like Amazon - * Nova and Qwen. For models that don't generate thinking tags, this has minimal + *
    • {@link ThinkingTagCleaner} - Removes thinking tags from models like Amazon Nova + * and Qwen. For models that don't generate thinking tags, this has minimal * performance impact due to fast-path optimization.
    • - *
    • {@link MarkdownCodeBlockCleaner} - Removes markdown code block - * formatting.
    • + *
    • {@link MarkdownCodeBlockCleaner} - Removes markdown code block formatting.
    • *
    • {@link WhitespaceCleaner} - Trims whitespace.
    • *
    *

    diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java index ff92f11cae6..11892b48151 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java @@ -33,8 +33,8 @@ *

  • Claude models: thinking blocks in different formats
  • * *

    - * Performance: This cleaner includes fast-path optimization. For responses - * without thinking tags (most models), it performs a quick character check and returns + * Performance: This cleaner includes fast-path optimization. For responses without + * thinking tags (most models), it performs a quick character check and returns * immediately, making it safe to use as a default cleaner even for non-thinking models. * * @author liugddx