Skip to content

Commit

Permalink
fix molgenis#9386: implement custom _all field for search-all feature
Browse files Browse the repository at this point in the history
  • Loading branch information
tommydeboer committed Mar 24, 2022
1 parent 458acd3 commit 22029e5
Show file tree
Hide file tree
Showing 12 changed files with 66 additions and 104 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import static org.molgenis.api.tests.utils.RestTestUtils.removePackages;
import static org.molgenis.api.tests.utils.RestTestUtils.uploadEmxFile;
import static org.molgenis.api.tests.utils.RestTestUtils.waitForIndexJobs;
import static org.slf4j.LoggerFactory.getLogger;

import io.restassured.response.ValidatableResponse;
import java.io.File;
Expand All @@ -32,11 +31,9 @@
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.molgenis.api.tests.AbstractApiTests;
import org.slf4j.Logger;

@TestMethodOrder(OrderAnnotation.class)
class SearchAPIIT extends AbstractApiTests {
private static final Logger LOG = getLogger(SearchAPIIT.class);
private static String adminToken;

private static File createEMX() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"DT014" "code" "LIKE" "ORPHA:100" "45" "ORPHA:100,ORPHA:1000,ORPHA:100008" "Finds substrings at the start"
"DT015" "label" "LIKE" "lvé" "2" "ORPHA:2380,urn:miriam:icd:M91.1" "Finds substrings with accents"
"DT016" "label" "LIKE" "calve" "0" "Postgres ILIKE does not do ascii folding [SHOULD FIND MATCHES]"
"DT017" "*" "SEARCH_QUERY" "E26* -unspecified" "4" "urn:miriam:icd:E26,urn:miriam:icd:E26.0" "Simple query string search query searches across fields"
"DT017" "*" "SEARCH_QUERY" "E26* -unspecified" "4" "urn:miriam:icd:E26.0,urn:miriam:icd:E20-E35" "Simple query string search query searches across fields"
"DT018" "label" "SEARCH_QUERY" "aciduria type 3" "3" "without quotes, is independent of order"
"DT019" "label" "SEARCH_QUERY" "aciduria ""type 3""" "1" "with quotes, is phrase query"
"DT020" "*" "SEARCH_QUERY" "aciduria ""type 3""" "1" "with quotes, is phrase query"
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"MUT008" "*" "SEARCH" "c.6187C>G" "1" "M18" "Finds only the exact mutation"
"MUT009" "*" "SEARCH" "c.6081_6082dup" "1" "M104" "Finds only the exact mutation"
"MUT010" "*" "SEARCH" "c.6082G>T" "1" "M657" "Finds only the exact mutation"
"MUT011" "*" "SEARCH" "C>T" "0" "" "Search all is consistent with field search"
"MUT011" "*" "SEARCH" "C>T" "143" "M922" "C>T also finds the c. in c.5318G>T"
"MUT012" "*" "SEARCH" "c.6082" "0" "Does not find substrings [SHOULD FIND MATHCES]"
"MUT013" "cdna_notation" "LIKE" "c.6082" "2" "M67,M657" "LIKE finds substrings at the start"
"MUT014" "cdna_notation" "LIKE" "6082" "3" "M67,M657,M104" "LIKE finds substrings in the middle"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"SRV002" "*" "SEARCH" "8.3" "0" "Search does not find partial matches [SHOULD FIND MATCHES]"
"SRV003" "*" "SEARCH" "molgenis" "1" "molgenis23" "Finds molgenis in server name"
"SRV004" "*" "SEARCH" "https://molgenis.org/demo" "1" "molgenis09" "Finds server by exact URL"
"SRV005" "*" "SEARCH" "SNAPSHOT" "0" "" "Searching in all fields is consistent with single field"
"SRV005" "*" "SEARCH" "SNAPSHOT" "1" "molgenis19" "Tokenizer splits token field when it gets copied to all field"
"SRV006" "version" "SEARCH" "8.3.11" "1" "molgenis09" "Finds exact match of the server version"
"SRV007" "version" "SEARCH" "8.3" "0" "Search does not find partial matches [SHOULD FIND MATCHES]"
"SRV008" "name" "SEARCH" "molgenis" "1" "molgenis23" "Finds molgenis in server name"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
import org.molgenis.data.elasticsearch.generator.model.Mapping;
import org.molgenis.util.UnexpectedEnumException;

/** Creates Elasticsearch client content for mappings. */
/** Creates Elasticsearch client content for mappings. An extra field "_all" is added to every index
* to replace the old search-all behavior that was removed in ES 7. Other fields add their content
* to this "_all" field using the "copy_to" parameter.*/
class MappingContentBuilder {

/**
Expand All @@ -29,6 +31,8 @@ class MappingContentBuilder {
public static final String FIELDS = "fields";
public static final String TYPE = "type";
public static final String INDEX = "index";
public static final String ALL_FIELD = "_all";
public static final String COPY_TO = "copy_to";

private final XContentType xContentType;

Expand All @@ -52,11 +56,12 @@ XContentBuilder createMapping(Mapping mapping) {
private void createMapping(Mapping mapping, XContentBuilder contentBuilder) throws IOException {
contentBuilder.startObject();
contentBuilder.startObject("_source").field("enabled", false).endObject();
createFieldMappings(mapping.getFieldMappings(), contentBuilder);
createFieldMappings(mapping.getFieldMappings(), contentBuilder, false);
contentBuilder.endObject();
}

private void createFieldMappings(List<FieldMapping> fieldMappings, XContentBuilder contentBuilder)
private void createFieldMappings(List<FieldMapping> fieldMappings, XContentBuilder contentBuilder,
boolean nested)
throws IOException {
contentBuilder.startObject("properties");
fieldMappings.forEach(
Expand All @@ -67,48 +72,36 @@ private void createFieldMappings(List<FieldMapping> fieldMappings, XContentBuild
throw new UncheckedIOException(e);
}
});

if (!nested) {
contentBuilder.startObject(ALL_FIELD).field("type", "text").endObject();
}

contentBuilder.endObject();
}

private void createFieldMapping(FieldMapping fieldMapping, XContentBuilder contentBuilder)
throws IOException {
contentBuilder.startObject(fieldMapping.getName());
switch (fieldMapping.getType()) {
case BOOLEAN:
createFieldMapping("boolean", contentBuilder);
break;
case DATE:
createFieldMappingDate("date", contentBuilder);
break;
case DATE_TIME:
createFieldMappingDate("date_time_no_millis", contentBuilder);
break;
case DOUBLE:
createFieldMapping("double", contentBuilder);
break;
case INTEGER:
createFieldMappingInteger(contentBuilder);
break;
case LONG:
createFieldMapping("long", contentBuilder);
break;
case NESTED:
createFieldMappingNested(fieldMapping.getNestedFieldMappings(), contentBuilder);
break;
case TEXT:
createFieldMappingText(contentBuilder, fieldMapping.isNeedsNgram());
break;
case KEYWORD:
createFieldMappingKeyword(fieldMapping, contentBuilder);
break;
default:
throw new UnexpectedEnumException(fieldMapping.getType());
case BOOLEAN -> createFieldMapping("boolean", contentBuilder);
case DATE -> createFieldMappingDate("date", contentBuilder);
case DATE_TIME -> createFieldMappingDate("date_time_no_millis", contentBuilder);
case DOUBLE -> createFieldMapping("double", contentBuilder);
case INTEGER -> createFieldMappingInteger(contentBuilder);
case LONG -> createFieldMapping("long", contentBuilder);
case NESTED -> createFieldMappingNested(fieldMapping.getNestedFieldMappings(),
contentBuilder);
case TEXT -> createFieldMappingText(contentBuilder, fieldMapping.isNeedsNgram());
case KEYWORD -> createFieldMappingKeyword(fieldMapping, contentBuilder);
default -> throw new UnexpectedEnumException(fieldMapping.getType());
}
contentBuilder.endObject();
}

private void createFieldMapping(String type, XContentBuilder contentBuilder) throws IOException {
contentBuilder.field(TYPE, type);
contentBuilder.field(COPY_TO, ALL_FIELD);
}

private void createFieldMappingDate(String dateFormat, XContentBuilder contentBuilder)
Expand All @@ -123,18 +116,20 @@ private void createFieldMappingDate(String dateFormat, XContentBuilder contentBu
.field(INDEX, true)
.endObject()
.endObject();
contentBuilder.field(COPY_TO, ALL_FIELD);
}

private void createFieldMappingInteger(XContentBuilder contentBuilder) throws IOException {
contentBuilder.field(TYPE, "integer");
// Fix sorting by using disk-based "fielddata" instead of in-memory "fielddata"
contentBuilder.field("doc_values", true);
contentBuilder.field(COPY_TO, ALL_FIELD);
}

private void createFieldMappingNested(
List<FieldMapping> nestedFieldMappings, XContentBuilder contentBuilder) throws IOException {
contentBuilder.field(TYPE, "nested");
createFieldMappings(nestedFieldMappings, contentBuilder);
createFieldMappings(nestedFieldMappings, contentBuilder, true);
}

private void createFieldMappingKeyword(FieldMapping fieldMapping, XContentBuilder contentBuilder)
Expand All @@ -151,6 +146,7 @@ private void createFieldMappingKeyword(FieldMapping fieldMapping, XContentBuilde
.field(INDEX, true)
.endObject();
fieldsObject.endObject();
contentBuilder.field(COPY_TO, ALL_FIELD);
}

private void createFieldMappingText(XContentBuilder contentBuilder, boolean needsNgram)
Expand All @@ -177,5 +173,6 @@ private void createFieldMappingText(XContentBuilder contentBuilder, boolean need
.endObject();
}
fieldsObject.endObject();
contentBuilder.field(COPY_TO, ALL_FIELD);
}
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
package org.molgenis.data.elasticsearch.generator;

import static java.lang.String.format;
import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
import static org.elasticsearch.index.query.QueryBuilders.nestedQuery;
import static org.molgenis.data.QueryUtils.getAttributePathExpanded;
import static org.molgenis.data.util.EntityTypeUtils.isReferenceType;

import java.util.List;
import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.molgenis.data.MolgenisQueryException;
Expand All @@ -29,22 +25,14 @@ QueryBuilder mapQueryRule(QueryRule queryRule, EntityType entityType) {
}

if (queryRule.getField() == null) {
return createQueryClauseSearchAll(queryRule, entityType);
return createQueryClauseSearchAll(queryRule);
} else {
return createQueryClauseSearchAttribute(queryRule, entityType);
}
}

private QueryBuilder createQueryClauseSearchAll(QueryRule queryRule, EntityType entityType) {
var builder = QueryBuilders.boolQuery().should(multiMatchQuery(queryRule.getValue()));
var clauses = builder.should();
for (Attribute child : entityType.getAtomicAttributes()) {
if (isReferenceType(child.getDataType())) {
var fieldName = getQueryFieldName(child);
clauses.add(nestedQuery(fieldName, multiMatchQuery(queryRule.getValue()), ScoreMode.Max));
}
}
return builder;
private QueryBuilder createQueryClauseSearchAll(QueryRule queryRule) {
return QueryBuilders.matchPhraseQuery("_all", queryRule.getValue()).slop(10);
}

private QueryBuilder createQueryClauseSearchAttribute(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ QueryBuilder mapQueryRule(QueryRule queryRule, EntityType entityType) {

private QueryBuilder createQueryClauseSearchAll(QueryRule queryRule) {
return QueryBuilders.simpleQueryStringQuery(queryRule.getValue().toString())
.defaultOperator(org.elasticsearch.index.query.Operator.AND);
.defaultOperator(org.elasticsearch.index.query.Operator.AND)
.field("_all");
}

private QueryBuilder createQueryClauseSearchAttribute(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
/** Generates Elasticsearch queries from MOLGENIS queries. */
@Component
public class QueryGenerator {

static final String ATTRIBUTE_SEPARATOR = ".";

private final EnumMap<Operator, QueryClauseGenerator> queryClauseGeneratorMap;
Expand Down Expand Up @@ -83,8 +84,9 @@ private QueryBuilder createQueryBuilder(List<QueryRule> queryRules, EntityType e
} else if (i + 1 < nrQueryRules) {
QueryRule occurQueryRule = queryRules.get(i + 1);
QueryRule.Operator occurOperator = occurQueryRule.getOperator();
if (occurOperator == null)
if (occurOperator == null) {
throw new MolgenisQueryException("Missing expected occur operator");
}

//noinspection EnumSwitchStatementWhichMissesCases
switch (occurOperator) {
Expand All @@ -103,7 +105,9 @@ private QueryBuilder createQueryBuilder(List<QueryRule> queryRules, EntityType e
}

QueryBuilder queryPartBuilder = createQueryClause(queryRule, entityType);
if (queryPartBuilder == null) continue; // skip SHOULD and DIS_MAX query rules
if (queryPartBuilder == null) {
continue; // skip SHOULD and DIS_MAX query rules
}

//noinspection ConstantConditions
if (occur == null) {
Expand Down Expand Up @@ -142,8 +146,7 @@ private QueryBuilder createQueryClause(QueryRule queryRule, EntityType entityTyp
case AND:
case OR:
case NOT:
throw new MolgenisQueryException(
format("Unexpected query operator [%s]", queryOperator.toString()));
throw new MolgenisQueryException(format("Unexpected query operator [%s]", queryOperator));
default:
QueryClauseGenerator queryClauseGenerator = queryClauseGeneratorMap.get(queryOperator);
if (queryClauseGenerator == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import static java.util.Collections.singletonList;
import static org.junit.jupiter.api.Assertions.assertEquals;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
Expand Down Expand Up @@ -40,15 +39,15 @@ static Iterator<Object[]> createMappingProvider() {

@ParameterizedTest
@MethodSource("createMappingProvider")
void testCreateMapping(MappingType mappingType, String expectedJson) throws IOException {
void testCreateMapping(MappingType mappingType, String expectedJson) {
Mapping mapping =
createMapping(FieldMapping.builder().setName("field").setType(mappingType).build());
XContentBuilder xContentBuilder = mappingContentBuilder.createMapping(mapping);
assertEquals(expectedJson, xContentBuilder.getOutputStream().toString());
}

@Test
void testCreateMappingKeywordCaseSensitive() throws IOException {
void testCreateMappingKeywordCaseSensitive() {
Mapping mapping =
createMapping(
FieldMapping.builder()
Expand All @@ -62,7 +61,7 @@ void testCreateMappingKeywordCaseSensitive() throws IOException {
}

@Test
void testCreateMappingNested() throws IOException {
void testCreateMappingNested() {
FieldMapping nestedFieldMapping =
FieldMapping.builder().setName("nestedField").setType(MappingType.BOOLEAN).build();
Mapping mapping =
Expand All @@ -81,23 +80,23 @@ private static Mapping createMapping(FieldMapping fieldMapping) {
}

private static final String JSON_BOOLEAN =
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"boolean\"}}}";
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"boolean\",\"copy_to\":\"_all\"},\"_all\":{\"type\":\"text\"}}}";
private static final String JSON_DATE =
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"date\",\"format\":\"date\",\"fields\":{\"raw\":{\"type\":\"keyword\",\"index\":true}}}}}";
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"date\",\"format\":\"date\",\"fields\":{\"raw\":{\"type\":\"keyword\",\"index\":true}},\"copy_to\":\"_all\"},\"_all\":{\"type\":\"text\"}}}";
private static final String JSON_DATE_TIME =
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"date\",\"format\":\"date_time_no_millis\",\"fields\":{\"raw\":{\"type\":\"keyword\",\"index\":true}}}}}";
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"date\",\"format\":\"date_time_no_millis\",\"fields\":{\"raw\":{\"type\":\"keyword\",\"index\":true}},\"copy_to\":\"_all\"},\"_all\":{\"type\":\"text\"}}}";
private static final String JSON_DOUBLE =
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"double\"}}}";
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"double\",\"copy_to\":\"_all\"},\"_all\":{\"type\":\"text\"}}}";
private static final String JSON_INTEGER =
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"integer\",\"doc_values\":true}}}";
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"integer\",\"doc_values\":true,\"copy_to\":\"_all\"},\"_all\":{\"type\":\"text\"}}}";
private static final String JSON_LONG =
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"long\"}}}";
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"long\",\"copy_to\":\"_all\"},\"_all\":{\"type\":\"text\"}}}";
private static final String JSON_TEXT =
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"text\",\"norms\":true,\"fields\":{\"raw\":{\"type\":\"keyword\",\"index\":true,\"ignore_above\":8191}}}}}";
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"text\",\"norms\":true,\"fields\":{\"raw\":{\"type\":\"keyword\",\"index\":true,\"ignore_above\":8191}},\"copy_to\":\"_all\"},\"_all\":{\"type\":\"text\"}}}";
private static final String JSON_KEYWORD =
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"keyword\",\"normalizer\":\"lowercase_asciifold\",\"fields\":{\"raw\":{\"type\":\"keyword\",\"index\":true}}}}}";
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"keyword\",\"normalizer\":\"lowercase_asciifold\",\"fields\":{\"raw\":{\"type\":\"keyword\",\"index\":true}},\"copy_to\":\"_all\"},\"_all\":{\"type\":\"text\"}}}";
private static final String JSON_KEYWORD_CASE_SENSITIVE =
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"keyword\",\"fields\":{\"raw\":{\"type\":\"keyword\",\"index\":true}}}}}";
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"keyword\",\"fields\":{\"raw\":{\"type\":\"keyword\",\"index\":true}},\"copy_to\":\"_all\"},\"_all\":{\"type\":\"text\"}}}";
private static final String JSON_NESTED =
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"nested\",\"properties\":{\"nestedField\":{\"type\":\"boolean\"}}}}}";
"{\"_source\":{\"enabled\":false},\"properties\":{\"field\":{\"type\":\"nested\",\"properties\":{\"nestedField\":{\"type\":\"boolean\",\"copy_to\":\"_all\"}}},\"_all\":{\"type\":\"text\"}}}";
}
Loading

0 comments on commit 22029e5

Please sign in to comment.