forked from elastic/elasticsearch
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Mapping: Default position_offset_gap to 100
This is much more fiddly than you'd expect it to be because of the way position_offset_gap is applied in StringFieldMapper. Instead of setting the default to 100 its simpler to make sure that all the analyzers default to 100 and that StringFieldMapper doesn't override the default unless the user specifies something different. Unless the index was created before 2.1, in which case the old default of 0 has to take. Also postition_offset_gaps less than 0 aren't allowed at all. New tests test that: 1. the new default doesn't match phrases across values with reasonably low slop (5) 2. the new default doest match phrases across values with reasonably high slop (50) 3. you can override the value and phrases work as you'd expect 4. if you leave the value undefined in the mapping and define it on a custom analyzer the the value from the custom analyzer shines through Closes elastic#7268
- Loading branch information
Showing
9 changed files
with
894 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
158 changes: 158 additions & 0 deletions
158
...t/java/org/elasticsearch/index/mapper/string/StringFieldMapperPositionOffsetGapTests.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.index.mapper.string; | ||
|
||
import com.google.common.collect.ImmutableList; | ||
|
||
import org.elasticsearch.ExceptionsHelper; | ||
import org.elasticsearch.client.Client; | ||
import org.elasticsearch.common.xcontent.XContentBuilder; | ||
import org.elasticsearch.common.xcontent.XContentFactory; | ||
import org.elasticsearch.index.mapper.MapperParsingException; | ||
import org.elasticsearch.test.ESSingleNodeTestCase; | ||
|
||
import java.io.IOException; | ||
|
||
import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery; | ||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; | ||
import static org.hamcrest.Matchers.containsString; | ||
|
||
/** | ||
* Tests that position_offset_gap is read from the mapper and applies as | ||
* expected in queries. | ||
*/ | ||
public class StringFieldMapperPositionOffsetGapTests extends ESSingleNodeTestCase { | ||
/** | ||
* The default position_offset_gap should be large enough that most | ||
* "sensible" queries phrase slops won't match across values. | ||
*/ | ||
public void testDefault() throws IOException { | ||
assertGapIsOneHundred(client(), "test", "test"); | ||
} | ||
|
||
/** | ||
* Asserts that the post-2.0 default is being applied. | ||
*/ | ||
public static void assertGapIsOneHundred(Client client, String indexName, String type) throws IOException { | ||
testGap(client(), indexName, type, 100); | ||
|
||
// No match across gap using default slop with default positionOffsetGap | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two")).get(), 0); | ||
|
||
// Nor with small-ish values | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(5)).get(), 0); | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(50)).get(), 0); | ||
|
||
// But huge-ish values still match | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(500)).get(), 1); | ||
} | ||
|
||
public void testZero() throws IOException { | ||
setupGapInMapping(0); | ||
assertGapIsZero(client(), "test", "test"); | ||
} | ||
|
||
/** | ||
* Asserts that the pre-2.0 default has been applied or explicitly | ||
* configured. | ||
*/ | ||
public static void assertGapIsZero(Client client, String indexName, String type) throws IOException { | ||
testGap(client, indexName, type, 0); | ||
/* | ||
* Phrases match across different values using default slop with pre-2.0 default | ||
* position_offset_gap. | ||
*/ | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two")).get(), 1); | ||
} | ||
|
||
public void testLargerThanDefault() throws IOException { | ||
setupGapInMapping(10000); | ||
testGap(client(), "test", "test", 10000); | ||
} | ||
|
||
public void testSmallerThanDefault() throws IOException { | ||
setupGapInMapping(2); | ||
testGap(client(), "test", "test", 2); | ||
} | ||
|
||
public void testNegativeIsError() throws IOException { | ||
try { | ||
setupGapInMapping(-1); | ||
fail("Expected an error"); | ||
} catch (MapperParsingException e) { | ||
assertThat(ExceptionsHelper.detailedMessage(e), containsString("positions_offset_gap less than 0 aren't allowed")); | ||
} | ||
} | ||
|
||
/** | ||
* Tests that the default actually defaults to the position_offset_gap | ||
* configured in the analyzer. This behavior is very old and a little | ||
* strange but not worth breaking some thought. | ||
*/ | ||
public void testDefaultDefaultsToAnalyzer() throws IOException { | ||
XContentBuilder settings = XContentFactory.jsonBuilder().startObject().startObject("analysis").startObject("analyzer") | ||
.startObject("gappy"); | ||
settings.field("type", "custom"); | ||
settings.field("tokenizer", "standard"); | ||
settings.field("position_offset_gap", 2); | ||
setupAnalyzer(settings, "gappy"); | ||
testGap(client(), "test", "test", 2); | ||
} | ||
|
||
/** | ||
* Build an index named "test" with a field named "string" with the provided | ||
* positionOffsetGap that uses the standard analyzer. | ||
*/ | ||
private void setupGapInMapping(int positionOffsetGap) throws IOException { | ||
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("properties").startObject("string"); | ||
mapping.field("type", "string"); | ||
mapping.field("position_offset_gap", positionOffsetGap); | ||
client().admin().indices().prepareCreate("test").addMapping("test", mapping).get(); | ||
} | ||
|
||
/** | ||
* Build an index named "test" with the provided settings and and a field | ||
* named "string" that uses the specified analyzer and default | ||
* position_offset_gap. | ||
*/ | ||
private void setupAnalyzer(XContentBuilder settings, String analyzer) throws IOException { | ||
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("properties").startObject("string"); | ||
mapping.field("type", "string"); | ||
mapping.field("analyzer", analyzer); | ||
client().admin().indices().prepareCreate("test").addMapping("test", mapping).setSettings(settings).get(); | ||
} | ||
|
||
private static void testGap(Client client, String indexName, String type, int positionOffsetGap) throws IOException { | ||
client.prepareIndex(indexName, type, "position_gap_test").setSource("string", ImmutableList.of("one", "two three")).setRefresh(true).get(); | ||
|
||
// Baseline - phrase query finds matches in the same field value | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "two three")).get(), 1); | ||
|
||
if (positionOffsetGap > 0) { | ||
// No match across gaps when slop < position gap | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionOffsetGap - 1)).get(), | ||
0); | ||
} | ||
|
||
// Match across gaps when slop >= position gap | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionOffsetGap)).get(), 1); | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionOffsetGap + 1)).get(), 1); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.