From 77f281267ca0b0a6fff5451f9e5ab52ce22b177b Mon Sep 17 00:00:00 2001 From: pfurio Date: Tue, 12 Aug 2025 13:06:06 +0200 Subject: [PATCH] datastore: add smart splitter that would consider quotes, #TASK-7881 --- .../commons/datastore/core/ObjectMap.java | 1 - .../commons/datastore/core/ObjectMapTest.java | 31 --------- .../datastore/mongodb/MongoDBQueryUtils.java | 58 ++++++++++++++++- .../datastore/mongodb/SmartSplitTest.java | 63 +++++++++++++++++++ 4 files changed, 118 insertions(+), 35 deletions(-) create mode 100644 commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/SmartSplitTest.java diff --git a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/ObjectMap.java b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/ObjectMap.java index 743b610f0..dd1beec33 100644 --- a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/ObjectMap.java +++ b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/ObjectMap.java @@ -38,7 +38,6 @@ public class ObjectMap implements Map, Serializable { private static final Pattern KEY_SPLIT_PATTERN = Pattern.compile("(^[^\\[\\].]+(?:\\[[^\\]]+\\])?)(?:\\.(.*))*"); private static final Pattern LIST_FILTER_PATTERN = Pattern.compile("([^\\[\\]]+)\\[([^=]*?)(?:[=]?)([^=]+)\\]$"); - public static final Pattern COMMA_SEPARATED_LIST_SPLIT_PATTERN = Pattern.compile("((?:(?!,\\S).)+)+"); public ObjectMap() { objectMap = new LinkedHashMap<>(); diff --git a/commons-datastore/commons-datastore-core/src/test/java/org/opencb/commons/datastore/core/ObjectMapTest.java b/commons-datastore/commons-datastore-core/src/test/java/org/opencb/commons/datastore/core/ObjectMapTest.java index 15f4e4e40..d0098015b 100644 --- a/commons-datastore/commons-datastore-core/src/test/java/org/opencb/commons/datastore/core/ObjectMapTest.java +++ b/commons-datastore/commons-datastore-core/src/test/java/org/opencb/commons/datastore/core/ObjectMapTest.java @@ -263,35 +263,4 @@ public void testGetWithFilterFromList() { assertEquals("CGHI", objectMap.get("nestedList[nested.value=G].nested.list[id=Cghi].name")); } - @Test - public void testPatternListSplit() { - List originalValues = Arrays.asList("disorder1", "disorder2, blabla", "disorder3"); - objectMap.put("key", StringUtils.join(originalValues, ",")); - objectMap.put("key1", ""); - objectMap.put("key2", "my value"); - objectMap.put("key3", Arrays.asList("1", "2")); - objectMap.put("key4", Arrays.asList(1, 2)); - - List values = objectMap.getAsStringList("key", ObjectMap.COMMA_SEPARATED_LIST_SPLIT_PATTERN); - assertEquals(originalValues.size(), values.size()); - assertTrue(originalValues.containsAll(values)); - - values = objectMap.getAsStringList("key1", ObjectMap.COMMA_SEPARATED_LIST_SPLIT_PATTERN); - assertEquals(1, values.size()); - assertEquals("", values.get(0)); - - values = objectMap.getAsStringList("key2", ObjectMap.COMMA_SEPARATED_LIST_SPLIT_PATTERN); - assertEquals(1, values.size()); - assertEquals("my value", values.get(0)); - - values = objectMap.getAsStringList("key3", ObjectMap.COMMA_SEPARATED_LIST_SPLIT_PATTERN); - assertEquals(2, values.size()); - assertEquals("1", values.get(0)); - assertEquals("2", values.get(1)); - - values = objectMap.getAsStringList("key4", ObjectMap.COMMA_SEPARATED_LIST_SPLIT_PATTERN); - assertEquals(2, values.size()); - assertEquals("1", values.get(0)); - assertEquals("2", values.get(1)); - } } diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index e80265d53..a57e79cf6 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -36,9 +36,9 @@ import static com.mongodb.client.model.Aggregates.*; import static com.mongodb.client.model.Projections.*; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.bucket; import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.count; -import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; /** * Created by imedina on 17/01/16. @@ -178,6 +178,53 @@ public static Bson createFilter(String mongoDbField, String queryParam, Query qu return filter; } + /** + * Splits a string by the given separator, handling quoted values properly. + * Quoted values can contain the separator character without being split. + * Removes surrounding quotes and trims whitespace from each value. + * + * @param input the input string to split + * @param separator the separator to split by ("," or ";") + * @return list of trimmed, unquoted values + */ + public static List smartSplit(String input, String separator) { + List result = new ArrayList<>(); + if (input == null || input.isEmpty()) { + return result; + } + + boolean inQuotes = false; + StringBuilder currentValue = new StringBuilder(); + + for (int i = 0; i < input.length(); i++) { + char c = input.charAt(i); + + if (c == '"') { + inQuotes = !inQuotes; + } else if (!inQuotes && input.substring(i).startsWith(separator)) { + // Found separator outside quotes + String value = currentValue.toString().trim(); + if (value.startsWith("\"") && value.endsWith("\"") && value.length() > 1) { + value = value.substring(1, value.length() - 1); + } + result.add(value.trim()); + currentValue = new StringBuilder(); + i += separator.length() - 1; // Skip the separator + } else { + currentValue.append(c); + } + } + + // Add the last value + String value = currentValue.toString().trim(); + if (value.startsWith("\"") && value.endsWith("\"") && value.length() > 1) { + value = value.substring(1, value.length() - 1); + } + result.add(value.trim()); + + return result; + } + private static String getLogicalSeparator(LogicalOperator operator) { return (operator != null && operator.equals(LogicalOperator.AND)) ? AND : OR; } @@ -258,8 +305,13 @@ protected static String getOp2(String op, String value) { public static Bson createAutoFilter(String mongoDbField, String queryParam, Query query, QueryParam.Type type, LogicalOperator operator) throws NumberFormatException { - - List queryParamList = query.getAsStringList(queryParam, getLogicalSeparator(operator)); + List queryParamList; + String value = query.getString(queryParam); + if (StringUtils.isNotEmpty(value) && value.contains("\"")) { + queryParamList = smartSplit(value, getLogicalSeparator(operator)); + } else { + queryParamList = query.getAsStringList(queryParam, getLogicalSeparator(operator)); + } return createAutoFilter(mongoDbField, queryParam, type, operator, queryParamList); } diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/SmartSplitTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/SmartSplitTest.java new file mode 100644 index 000000000..59a5b2c2b --- /dev/null +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/SmartSplitTest.java @@ -0,0 +1,63 @@ +package org.opencb.commons.datastore.mongodb; + +import org.junit.Test; +import java.util.List; +import static org.junit.Assert.assertEquals; + +public class SmartSplitTest { + + @Test + public void testSmartSplitWithQuotedCommaValues() { + String input = "\"a \",\" b\",\" c \""; + List result = MongoDBQueryUtils.smartSplit(input, ","); + + assertEquals(3, result.size()); + assertEquals("a", result.get(0)); + assertEquals("b", result.get(1)); + assertEquals("c", result.get(2)); + } + + @Test + public void testSmartSplitWithQuotedSemicolonValues() { + String input = "\"a \";\" b\";\" c \""; + List result = MongoDBQueryUtils.smartSplit(input, ";"); + + assertEquals(3, result.size()); + assertEquals("a", result.get(0)); + assertEquals("b", result.get(1)); + assertEquals("c", result.get(2)); + } + + @Test + public void testSmartSplitWithNonQuotedValues() { + String input = "a,b,c"; + List result = MongoDBQueryUtils.smartSplit(input, ","); + + assertEquals(3, result.size()); + assertEquals("a", result.get(0)); + assertEquals("b", result.get(1)); + assertEquals("c", result.get(2)); + } + + @Test + public void testSmartSplitWithNonQuotedValuesAndSpaces() { + String input = "a, b,c "; + List result = MongoDBQueryUtils.smartSplit(input, ","); + + assertEquals(3, result.size()); + assertEquals("a", result.get(0)); + assertEquals("b", result.get(1)); + assertEquals("c", result.get(2)); + } + + @Test + public void testSmartSplitWithMixedValues() { + String input = "\"a, with comma\",b,\"c\""; + List result = MongoDBQueryUtils.smartSplit(input, ","); + + assertEquals(3, result.size()); + assertEquals("a, with comma", result.get(0)); + assertEquals("b", result.get(1)); + assertEquals("c", result.get(2)); + } +}